#!/usr/bin/env python3
"""
Step 1: Test accessing SF inspection site with real browser via Xvfb
This script simply tries to load the homepage and capture what we see
"""

import time
import subprocess
import os
import signal
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import logging

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class XvfbBrowser:
    def __init__(self):
        self.xvfb_proc = None
        self.display = ":99"
        self.driver = None
        
    def start_xvfb(self):
        """Start Xvfb virtual display"""
        try:
            # Kill any existing Xvfb on display :99
            subprocess.run(["pkill", "-f", "Xvfb :99"], capture_output=True)
            time.sleep(1)
            
            # Start Xvfb with specific display
            logger.info("Starting Xvfb virtual display...")
            self.xvfb_proc = subprocess.Popen([
                "Xvfb", 
                self.display,
                "-screen", "0", "1920x1080x24",
                "-ac",  # Disable access control
                "+extension", "GLX",
                "+render",
                "-noreset"
            ])
            
            # Set DISPLAY environment variable
            os.environ["DISPLAY"] = self.display
            time.sleep(2)  # Give Xvfb time to start
            
            logger.info(f"Xvfb started on display {self.display}")
            return True
            
        except Exception as e:
            logger.error(f"Failed to start Xvfb: {e}")
            return False
    
    def setup_browser(self):
        """Setup Chrome with minimal options - act like a real browser"""
        try:
            chrome_options = Options()
            
            # Minimal options - we want to appear as real as possible
            chrome_options.add_argument("--no-sandbox")  # Required for root
            chrome_options.add_argument("--disable-dev-shm-usage")  # Overcome limited resource problems
            
            # Set window size
            chrome_options.add_argument("--window-size=1920,1080")
            chrome_options.add_argument("--start-maximized")
            
            # Don't use headless - we're using Xvfb for that
            # chrome_options.add_argument("--headless")  # DO NOT USE
            
            # Create driver
            logger.info("Starting Chrome browser...")
            self.driver = webdriver.Chrome(options=chrome_options)
            
            # Set realistic timeouts
            self.driver.set_page_load_timeout(30)
            self.driver.implicitly_wait(10)
            
            logger.info("Chrome browser started successfully")
            return True
            
        except Exception as e:
            logger.error(f"Failed to setup browser: {e}")
            return False
    
    def test_access(self):
        """Try to access the SF inspection site"""
        try:
            url = "https://inspections.myhealthdepartment.com/san-francisco"
            logger.info(f"Attempting to access: {url}")
            
            # Navigate to the page
            self.driver.get(url)
            
            # Wait a bit like a human would
            logger.info("Waiting for page to load...")
            time.sleep(5)
            
            # Get page info
            current_url = self.driver.current_url
            page_title = self.driver.title
            
            logger.info(f"Current URL: {current_url}")
            logger.info(f"Page Title: {page_title}")
            
            # Take screenshot
            screenshot_path = "/tmp/sf_xvfb_screenshot.png"
            self.driver.save_screenshot(screenshot_path)
            logger.info(f"Screenshot saved to: {screenshot_path}")
            
            # Save page source
            page_source = self.driver.page_source
            with open("/tmp/sf_xvfb_page.html", "w") as f:
                f.write(page_source)
            logger.info("Page source saved to: /tmp/sf_xvfb_page.html")
            
            # Check if we got blocked
            if "403" in page_title or "Forbidden" in page_source[:500]:
                logger.warning("⚠️  BLOCKED: Got 403 Forbidden")
                return False
            else:
                logger.info("✅ SUCCESS: Page loaded without 403!")
                
                # Try to find some content
                try:
                    # Look for any links or text
                    links = self.driver.find_elements(By.TAG_NAME, "a")
                    logger.info(f"Found {len(links)} links on page")
                    
                    # Show first few links
                    for i, link in enumerate(links[:5]):
                        href = link.get_attribute("href")
                        text = link.text
                        if href or text:
                            logger.info(f"  Link {i+1}: {text[:50]} -> {href}")
                    
                    # Look for inspection-related content
                    page_text = self.driver.find_element(By.TAG_NAME, "body").text
                    if "inspection" in page_text.lower():
                        logger.info("Found 'inspection' keyword in page content")
                    if "restaurant" in page_text.lower():
                        logger.info("Found 'restaurant' keyword in page content")
                    
                except Exception as e:
                    logger.warning(f"Error examining page content: {e}")
                
                return True
                
        except Exception as e:
            logger.error(f"Error accessing site: {e}")
            return False
    
    def cleanup(self):
        """Clean up browser and Xvfb"""
        try:
            if self.driver:
                logger.info("Closing browser...")
                self.driver.quit()
            
            if self.xvfb_proc:
                logger.info("Stopping Xvfb...")
                self.xvfb_proc.terminate()
                self.xvfb_proc.wait(timeout=5)
                
        except Exception as e:
            logger.error(f"Error during cleanup: {e}")
            # Force kill if needed
            subprocess.run(["pkill", "-f", "Xvfb"], capture_output=True)

def main():
    logger.info("="*60)
    logger.info("SF INSPECTION SITE ACCESS TEST")
    logger.info("Using Xvfb + Real Chrome Browser")
    logger.info("="*60)
    
    browser = XvfbBrowser()
    
    try:
        # Start virtual display
        if not browser.start_xvfb():
            logger.error("Failed to start Xvfb")
            return
        
        # Setup browser
        if not browser.setup_browser():
            logger.error("Failed to setup browser")
            return
        
        # Test access
        success = browser.test_access()
        
        # Report results
        print("\n" + "="*60)
        print("TEST RESULTS")
        print("="*60)
        
        if success:
            print("✅ SUCCESS: Able to access the site!")
            print("Check /tmp/sf_xvfb_screenshot.png to see what the page looks like")
            print("Check /tmp/sf_xvfb_page.html for the HTML content")
            print("\nNext steps:")
            print("1. Analyze the page structure")
            print("2. Find inspection links")
            print("3. Click on inspections and extract data")
        else:
            print("❌ BLOCKED: Still getting 403 Forbidden")
            print("The site is detecting automated access even with Xvfb")
            print("\nPossible solutions:")
            print("1. Add more human-like behavior (mouse movements, delays)")
            print("2. Use a browser profile with cookies")
            print("3. Try Firefox instead of Chrome")
            print("4. Use a proxy or VPN")
        
    except Exception as e:
        logger.error(f"Unexpected error: {e}")
        import traceback
        traceback.print_exc()
        
    finally:
        browser.cleanup()
        
    logger.info("Test complete")

if __name__ == "__main__":
    main()