#!/usr/bin/env python3
"""
San Francisco Inspection Collector - GUI Version
Professional Windows application with status indicators
"""

import tkinter as tk
from tkinter import ttk, messagebox, scrolledtext
import threading
import os
import sys
import json
import time
import base64
import requests
from datetime import datetime
from queue import Queue
import webbrowser

try:
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
except ImportError:
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "selenium", "requests"])
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC

class SFCollectorGUI:
    def __init__(self, root):
        self.root = root
        self.root.title("SF Inspection Collector v2.0")
        self.root.geometry("900x700")
        self.root.resizable(True, True)
        
        # Set icon and colors
        self.root.configure(bg='#f0f0f0')
        
        # Variables
        self.server_url = tk.StringVar(value="https://cleankitchens.org/data/api_receiver.php")
        self.api_key = tk.StringVar(value="your-secret-api-key-change-this")
        self.collect_limit = tk.IntVar(value=10)
        self.is_running = False
        self.driver = None
        self.message_queue = Queue()
        
        # Statistics
        self.stats = {
            'total': 0,
            'collected': 0,
            'uploaded': 0,
            'failed': 0,
            'pdfs': 0
        }
        
        self.setup_gui()
        self.load_settings()
        
    def setup_gui(self):
        """Create the GUI layout"""
        
        # Main container
        main_frame = ttk.Frame(self.root, padding="10")
        main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
        
        # Configure grid weights
        self.root.columnconfigure(0, weight=1)
        self.root.rowconfigure(0, weight=1)
        main_frame.columnconfigure(1, weight=1)
        
        # Title
        title_label = tk.Label(main_frame, text="🌁 San Francisco Inspection Collector", 
                               font=('Arial', 16, 'bold'), bg='#f0f0f0')
        title_label.grid(row=0, column=0, columnspan=3, pady=10)
        
        # Settings Frame
        settings_frame = ttk.LabelFrame(main_frame, text="Settings", padding="10")
        settings_frame.grid(row=1, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=10)
        
        # Server URL
        ttk.Label(settings_frame, text="Server URL:").grid(row=0, column=0, sticky=tk.W, pady=5)
        url_entry = ttk.Entry(settings_frame, textvariable=self.server_url, width=50)
        url_entry.grid(row=0, column=1, sticky=(tk.W, tk.E), pady=5)
        ttk.Button(settings_frame, text="Test", command=self.test_server).grid(row=0, column=2, padx=5)
        
        # API Key
        ttk.Label(settings_frame, text="API Key:").grid(row=1, column=0, sticky=tk.W, pady=5)
        ttk.Entry(settings_frame, textvariable=self.api_key, width=50, show="*").grid(row=1, column=1, sticky=(tk.W, tk.E), pady=5)
        
        # Collect Limit
        ttk.Label(settings_frame, text="Inspections to Collect:").grid(row=2, column=0, sticky=tk.W, pady=5)
        limit_frame = ttk.Frame(settings_frame)
        limit_frame.grid(row=2, column=1, sticky=tk.W, pady=5)
        ttk.Spinbox(limit_frame, from_=1, to=1000, textvariable=self.collect_limit, width=10).pack(side=tk.LEFT)
        ttk.Label(limit_frame, text="(set to 0 for all)").pack(side=tk.LEFT, padx=10)
        
        settings_frame.columnconfigure(1, weight=1)
        
        # Status Frame
        status_frame = ttk.LabelFrame(main_frame, text="Collection Status", padding="10")
        status_frame.grid(row=2, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=10)
        
        # Progress indicators
        self.status_indicators = {}
        
        # Create status rows
        status_items = [
            ("Server Connection", "server"),
            ("Browser Started", "browser"),
            ("Site Accessed", "site"),
            ("Data Collection", "collection"),
            ("PDF Download", "pdf"),
            ("Server Upload", "upload")
        ]
        
        for i, (label, key) in enumerate(status_items):
            row = i // 3
            col = (i % 3) * 2
            
            ttk.Label(status_frame, text=label + ":").grid(row=row, column=col, sticky=tk.W, padx=5, pady=3)
            
            indicator = tk.Label(status_frame, text="⚪ Waiting", font=('Arial', 10))
            indicator.grid(row=row, column=col+1, sticky=tk.W, padx=5, pady=3)
            self.status_indicators[key] = indicator
        
        # Statistics Frame
        stats_frame = ttk.LabelFrame(main_frame, text="Statistics", padding="10")
        stats_frame.grid(row=3, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=10)
        
        self.stats_labels = {}
        stats_items = [
            ("Total Found:", "total"),
            ("Collected:", "collected"),
            ("Uploaded:", "uploaded"),
            ("PDFs Downloaded:", "pdfs"),
            ("Failed:", "failed")
        ]
        
        for i, (label, key) in enumerate(stats_items):
            col = i * 2
            ttk.Label(stats_frame, text=label).grid(row=0, column=col, sticky=tk.W, padx=5)
            
            stat_label = ttk.Label(stats_frame, text="0", font=('Arial', 10, 'bold'))
            stat_label.grid(row=0, column=col+1, sticky=tk.W, padx=5)
            self.stats_labels[key] = stat_label
        
        # Progress Bar
        self.progress_var = tk.DoubleVar()
        self.progress_bar = ttk.Progressbar(main_frame, variable=self.progress_var, 
                                           maximum=100, length=400)
        self.progress_bar.grid(row=4, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=10)
        
        # Log Frame
        log_frame = ttk.LabelFrame(main_frame, text="Activity Log", padding="5")
        log_frame.grid(row=5, column=0, columnspan=3, sticky=(tk.W, tk.E, tk.N, tk.S), pady=10)
        
        # Log Text Area
        self.log_text = scrolledtext.ScrolledText(log_frame, height=12, width=80, wrap=tk.WORD)
        self.log_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
        
        # Configure tags for colored text
        self.log_text.tag_config('success', foreground='green')
        self.log_text.tag_config('error', foreground='red')
        self.log_text.tag_config('warning', foreground='orange')
        self.log_text.tag_config('info', foreground='blue')
        
        # Control Buttons
        button_frame = ttk.Frame(main_frame)
        button_frame.grid(row=6, column=0, columnspan=3, pady=10)
        
        self.start_button = ttk.Button(button_frame, text="▶ Start Collection", 
                                      command=self.start_collection, width=20)
        self.start_button.pack(side=tk.LEFT, padx=5)
        
        self.stop_button = ttk.Button(button_frame, text="⏹ Stop", 
                                     command=self.stop_collection, width=20, state='disabled')
        self.stop_button.pack(side=tk.LEFT, padx=5)
        
        ttk.Button(button_frame, text="📁 Open Data Folder", 
                  command=self.open_data_folder, width=20).pack(side=tk.LEFT, padx=5)
        
        ttk.Button(button_frame, text="💾 Save Settings", 
                  command=self.save_settings, width=20).pack(side=tk.LEFT, padx=5)
        
        # Configure grid weights for resizing
        main_frame.rowconfigure(5, weight=1)
        log_frame.rowconfigure(0, weight=1)
        log_frame.columnconfigure(0, weight=1)
        
        # Footer
        footer = ttk.Label(main_frame, text="Ready to collect inspection data", 
                          relief=tk.SUNKEN, anchor=tk.W)
        footer.grid(row=7, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=5)
        self.footer_label = footer
        
    def update_status(self, key, status, message=""):
        """Update status indicator"""
        if key in self.status_indicators:
            indicator = self.status_indicators[key]
            
            if status == "working":
                indicator.config(text="🔵 Working...", fg="blue")
            elif status == "success":
                indicator.config(text="✅ Success", fg="green")
            elif status == "error":
                indicator.config(text="❌ Failed", fg="red")
            elif status == "warning":
                indicator.config(text="⚠️ Warning", fg="orange")
            else:
                indicator.config(text="⚪ Waiting", fg="gray")
        
        if message:
            self.log(message, status)
    
    def update_stats(self):
        """Update statistics display"""
        for key, label in self.stats_labels.items():
            label.config(text=str(self.stats.get(key, 0)))
    
    def log(self, message, level="info"):
        """Add message to log with color coding"""
        timestamp = datetime.now().strftime("%H:%M:%S")
        
        # Ensure we're in the main thread
        if threading.current_thread() != threading.main_thread():
            self.message_queue.put((message, level))
            return
        
        self.log_text.insert(tk.END, f"[{timestamp}] ", 'info')
        
        if level == "success":
            self.log_text.insert(tk.END, f"✓ {message}\n", 'success')
        elif level == "error":
            self.log_text.insert(tk.END, f"✗ {message}\n", 'error')
        elif level == "warning":
            self.log_text.insert(tk.END, f"⚠ {message}\n", 'warning')
        else:
            self.log_text.insert(tk.END, f"{message}\n")
        
        self.log_text.see(tk.END)
        self.root.update_idletasks()
    
    def process_message_queue(self):
        """Process queued messages from thread"""
        while not self.message_queue.empty():
            message, level = self.message_queue.get()
            self.log(message, level)
        
        if self.is_running:
            self.root.after(100, self.process_message_queue)
    
    def test_server(self):
        """Test server connection"""
        self.update_status("server", "working")
        
        try:
            response = requests.get(self.server_url.get(), timeout=5)
            if response.status_code == 200:
                data = response.json()
                if data.get('status') == 'ready':
                    self.update_status("server", "success", "Server connection successful")
                    messagebox.showinfo("Success", "Server is ready and responding!")
                else:
                    self.update_status("server", "warning", "Server responded but status unclear")
            else:
                self.update_status("server", "error", f"Server returned status {response.status_code}")
                messagebox.showerror("Error", f"Server returned status {response.status_code}")
        except Exception as e:
            self.update_status("server", "error", f"Cannot connect to server: {str(e)}")
            messagebox.showerror("Connection Error", f"Cannot connect to server:\n{str(e)}")
    
    def start_collection(self):
        """Start the collection process in a thread"""
        if self.is_running:
            return
        
        # Validate settings
        if self.api_key.get() == "your-secret-api-key-change-this":
            messagebox.showwarning("Configuration", "Please set your API key first!")
            return
        
        # Reset status indicators
        for key in self.status_indicators:
            self.update_status(key, "waiting")
        
        # Reset statistics
        self.stats = {'total': 0, 'collected': 0, 'uploaded': 0, 'failed': 0, 'pdfs': 0}
        self.update_stats()
        
        # Clear log
        self.log_text.delete(1.0, tk.END)
        
        # Update UI
        self.is_running = True
        self.start_button.config(state='disabled')
        self.stop_button.config(state='normal')
        self.footer_label.config(text="Collection in progress...")
        
        # Start collection thread
        self.collection_thread = threading.Thread(target=self.collection_worker, daemon=True)
        self.collection_thread.start()
        
        # Start message queue processor
        self.process_message_queue()
    
    def collection_worker(self):
        """Worker thread for collection"""
        try:
            # Test server
            self.update_status("server", "working")
            self.log("Testing server connection...")
            
            try:
                headers = {'X-API-Key': self.api_key.get()}
                response = requests.get(self.server_url.get(), timeout=10)
                if response.status_code == 200:
                    self.update_status("server", "success", "Server connection established")
                    can_upload = True
                else:
                    self.update_status("server", "warning", "Server not responding - will save locally only")
                    can_upload = False
            except Exception as e:
                self.update_status("server", "warning", f"Cannot connect to server - will save locally")
                can_upload = False
            
            # Setup browser
            self.update_status("browser", "working")
            self.log("Starting Chrome browser...")
            
            chrome_options = Options()
            chrome_options.add_argument("--window-size=1920,1080")
            chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
            chrome_options.add_experimental_option('useAutomationExtension', False)
            
            try:
                self.driver = webdriver.Chrome(options=chrome_options)
                self.update_status("browser", "success", "Chrome browser started")
            except Exception as e:
                self.update_status("browser", "error", f"Failed to start Chrome: {str(e)}")
                self.log("Make sure Chrome and ChromeDriver are installed", "error")
                return
            
            # Access site
            self.update_status("site", "working")
            self.log("Accessing SF inspection site...")
            
            base_url = "https://inspections.myhealthdepartment.com/san-francisco"
            self.driver.get(base_url)
            time.sleep(3)
            
            if "403" in self.driver.title:
                self.update_status("site", "error", "Site blocked access (403 Forbidden)")
                self.log("Cannot access site - must run from allowed network", "error")
                return
            
            self.update_status("site", "success", "Site accessed successfully")
            
            # Find inspections
            self.update_status("collection", "working")
            self.log("Finding inspection links...")
            
            inspections = []
            links = self.driver.find_elements(By.TAG_NAME, "a")
            
            for link in links:
                href = link.get_attribute("href") or ""
                if "inspectionID=" in href:
                    import re
                    match = re.search(r'inspectionID=([A-F0-9\-]+)', href)
                    if match:
                        inspections.append({
                            'id': match.group(1),
                            'url': href,
                            'name': link.text.strip()
                        })
            
            self.stats['total'] = len(inspections)
            self.update_stats()
            self.log(f"Found {len(inspections)} inspections", "success")
            
            # Limit collection
            limit = self.collect_limit.get()
            if limit > 0:
                inspections = inspections[:limit]
            
            # Collect each inspection
            self.update_status("collection", "working")
            data_dir = f"sf_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
            os.makedirs(data_dir, exist_ok=True)
            os.makedirs(f"{data_dir}/json", exist_ok=True)
            os.makedirs(f"{data_dir}/pdfs", exist_ok=True)
            
            for i, inspection in enumerate(inspections, 1):
                if not self.is_running:
                    break
                
                # Update progress
                progress = (i / len(inspections)) * 100
                self.progress_var.set(progress)
                
                self.log(f"[{i}/{len(inspections)}] Collecting: {inspection['name']}")
                
                try:
                    # Navigate to inspection
                    self.driver.get(inspection['url'])
                    time.sleep(2)
                    
                    # Collect data
                    data = {
                        'inspection_id': inspection['id'],
                        'facility_name': inspection['name'],
                        'url': inspection['url'],
                        'collected_at': datetime.now().isoformat()
                    }
                    
                    # Get page text
                    body = self.driver.find_element(By.TAG_NAME, "body")
                    data['page_text'] = body.text
                    
                    # Extract fields
                    import re
                    patterns = {
                        'address': r'(?:Address|Location)[:\s]+([^\n]+)',
                        'date': r'(?:Date|Inspection Date)[:\s]+(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
                        'score': r'(?:Score|Points)[:\s]+(\d+)',
                        'grade': r'(?:Grade|Rating)[:\s]+([A-F])'
                    }
                    
                    for field, pattern in patterns.items():
                        match = re.search(pattern, data['page_text'], re.IGNORECASE)
                        if match:
                            data[field] = match.group(1).strip()
                    
                    # Update stats
                    self.stats['collected'] += 1
                    self.update_stats()
                    self.update_status("collection", "success")
                    
                    # Try to download PDF
                    self.update_status("pdf", "working")
                    pdf_url = f"https://inspections.myhealthdepartment.com/san-francisco/print/?task=getPrintable&path=san-francisco&pKey={inspection['id']},{inspection['id']}"
                    
                    try:
                        cookies = self.driver.get_cookies()
                        session = requests.Session()
                        for cookie in cookies:
                            session.cookies.set(cookie['name'], cookie['value'])
                        
                        response = session.get(pdf_url, headers={'User-Agent': 'Mozilla/5.0'}, timeout=30)
                        
                        if response.status_code == 200 and response.content.startswith(b'%PDF'):
                            pdf_path = f"{data_dir}/pdfs/{inspection['id']}.pdf"
                            with open(pdf_path, 'wb') as f:
                                f.write(response.content)
                            
                            data['pdf_size'] = len(response.content)
                            data['pdf_data'] = base64.b64encode(response.content).decode('utf-8')
                            
                            self.stats['pdfs'] += 1
                            self.update_stats()
                            self.update_status("pdf", "success")
                            self.log(f"  ✓ PDF downloaded ({len(response.content):,} bytes)", "success")
                        else:
                            self.update_status("pdf", "warning")
                            
                    except Exception as e:
                        self.update_status("pdf", "warning")
                        self.log(f"  PDF download failed: {str(e)}", "warning")
                    
                    # Save JSON
                    json_path = f"{data_dir}/json/{inspection['id']}.json"
                    with open(json_path, 'w') as f:
                        json.dump(data, f, indent=2)
                    
                    # Upload to server if available
                    if can_upload:
                        self.update_status("upload", "working")
                        
                        try:
                            headers = {
                                'X-API-Key': self.api_key.get(),
                                'Content-Type': 'application/json'
                            }
                            
                            # Upload inspection data
                            upload_data = {
                                'action': 'upload_inspection',
                                'inspection_id': data['inspection_id'],
                                'inspection_data': {k: v for k, v in data.items() if k != 'pdf_data'}
                            }
                            
                            response = requests.post(
                                self.server_url.get(),
                                json=upload_data,
                                headers=headers,
                                timeout=30
                            )
                            
                            if response.status_code == 200:
                                self.stats['uploaded'] += 1
                                self.update_stats()
                                self.update_status("upload", "success")
                                self.log(f"  ✓ Uploaded to server", "success")
                                
                                # Upload PDF if exists
                                if 'pdf_data' in data:
                                    pdf_upload = {
                                        'action': 'upload_pdf',
                                        'inspection_id': data['inspection_id'],
                                        'pdf_data': data['pdf_data']
                                    }
                                    requests.post(
                                        self.server_url.get(),
                                        json=pdf_upload,
                                        headers=headers,
                                        timeout=30
                                    )
                            else:
                                self.update_status("upload", "warning")
                                
                        except Exception as e:
                            self.update_status("upload", "warning")
                            self.log(f"  Upload failed: {str(e)}", "warning")
                    
                except Exception as e:
                    self.stats['failed'] += 1
                    self.update_stats()
                    self.log(f"  Error: {str(e)}", "error")
                
                time.sleep(1)
            
            # Complete
            self.log(f"\n{'='*50}", "info")
            self.log("COLLECTION COMPLETE!", "success")
            self.log(f"Total collected: {self.stats['collected']}/{self.stats['total']}", "success")
            self.log(f"PDFs downloaded: {self.stats['pdfs']}", "success")
            if can_upload:
                self.log(f"Uploaded to server: {self.stats['uploaded']}", "success")
            self.log(f"Data saved to: {os.path.abspath(data_dir)}", "info")
            
        except Exception as e:
            self.log(f"Collection error: {str(e)}", "error")
            
        finally:
            if self.driver:
                self.driver.quit()
            
            self.is_running = False
            self.root.after(0, self.collection_complete)
    
    def collection_complete(self):
        """Called when collection is complete"""
        self.start_button.config(state='normal')
        self.stop_button.config(state='disabled')
        self.footer_label.config(text="Collection complete")
        self.progress_var.set(100)
        
        # Show summary
        summary = f"Collection Complete!\n\n"
        summary += f"Inspections collected: {self.stats['collected']}\n"
        summary += f"PDFs downloaded: {self.stats['pdfs']}\n"
        summary += f"Uploaded to server: {self.stats['uploaded']}\n"
        summary += f"Failed: {self.stats['failed']}"
        
        messagebox.showinfo("Complete", summary)
    
    def stop_collection(self):
        """Stop the collection process"""
        self.is_running = False
        self.log("Stopping collection...", "warning")
        
        if self.driver:
            try:
                self.driver.quit()
            except:
                pass
    
    def open_data_folder(self):
        """Open the data folder in Windows Explorer"""
        try:
            os.startfile(os.getcwd())
        except:
            pass
    
    def save_settings(self):
        """Save settings to file"""
        settings = {
            'server_url': self.server_url.get(),
            'api_key': self.api_key.get(),
            'collect_limit': self.collect_limit.get()
        }
        
        with open('sf_collector_settings.json', 'w') as f:
            json.dump(settings, f, indent=2)
        
        self.log("Settings saved", "success")
        messagebox.showinfo("Settings", "Settings saved successfully!")
    
    def load_settings(self):
        """Load settings from file"""
        try:
            with open('sf_collector_settings.json', 'r') as f:
                settings = json.load(f)
                
            self.server_url.set(settings.get('server_url', self.server_url.get()))
            self.api_key.set(settings.get('api_key', self.api_key.get()))
            self.collect_limit.set(settings.get('collect_limit', self.collect_limit.get()))
            
            self.log("Settings loaded", "info")
        except:
            pass

def main():
    root = tk.Tk()
    app = SFCollectorGUI(root)
    root.mainloop()

if __name__ == "__main__":
    main()