#!/home/chris/cleankitchens-env/bin/python3
"""
San Francisco Two-Phase Article Generation
Based on Chicago's two-phase system
Phase 1: Analyze inspection and extract metadata
Phase 2: Generate educational article with proper HTML
"""

import os
import json
import mysql.connector
import re
from datetime import datetime
from anthropic import Anthropic
from dotenv import load_dotenv

# Load environment variables
load_dotenv('/home/chris/.env')

class SFTwoPhaseGenerator:
    def __init__(self):
        # Database connection
        self.conn = mysql.connector.connect(
            host='localhost',
            database='cleankitchens',
            user='root',  # Update credentials
            password=''
        )
        self.cursor = self.conn.cursor(dictionary=True)
        
        # Initialize Anthropic client
        self.anthropic = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
        
        # Government references for SF
        self.gov_references = {
            'temperature': {
                'danger_zone': ('USDA Danger Zone', 'https://www.fsis.usda.gov/food-safety/safe-food-handling-and-preparation/food-safety-basics/danger-zone-40f-140f'),
                'cold_holding': ('FDA Cold Storage', 'https://www.fda.gov/food/buy-store-serve-safe-food/safe-food-storage'),
                'hot_holding': ('California Retail Food Code', 'https://www.cdph.ca.gov/Programs/CEH/DFDCS/CDPH%20Document%20Library/FDB/FoodSafetyProgram/RetailFood/CaliforniaRetailFoodCode.pdf')
            },
            'hygiene': {
                'handwashing': ('CDC Handwashing Guidelines', 'https://www.cdc.gov/handwashing/when-how-handwashing.html'),
                'employee_health': ('SF Employee Health Policy', 'https://www.sfdph.org/dph/EH/Food/default.asp')
            },
            'pests': {
                'rodents': ('CDC Rodent Control', 'https://www.cdc.gov/rodents/diseases/index.html'),
                'prevention': ('EPA IPM Principles', 'https://www.epa.gov/safepestcontrol/integrated-pest-management-ipm-principles')
            },
            'sanfrancisco': {
                'health_dept': ('SF Department of Public Health', 'https://www.sfdph.org/dph/EH/Food/Inspections.asp'),
                'regulations': ('SF Health Code Article 8', 'https://codelibrary.amlegal.com/codes/san_francisco/latest/sf_health/0-0-0-1727'),
                'inspection_data': ('DataSF Food Inspections', 'https://data.sfgov.org/Health-and-Social-Services/Restaurant-Scores-LIVES-Standard/pyih-qa8i')
            }
        }
    
    def get_recent_inspections(self, limit=10, status_filter=None):
        """Get recent inspections from database"""
        sql = """
        SELECT * FROM sf_temp 
        WHERE 1=1
        """
        params = []
        
        if status_filter:
            sql += " AND inspection_status = %s"
            params.append(status_filter)
        
        sql += " ORDER BY inspection_date DESC LIMIT %s"
        params.append(limit)
        
        self.cursor.execute(sql, params)
        return self.cursor.fetchall()
    
    def phase1_analyze_metadata(self, inspection):
        """Phase 1: Extract metadata and analyze inspection"""
        
        # Parse violations if stored as JSON string
        violations_text = inspection.get('violations', '[]')
        if isinstance(violations_text, str):
            try:
                violations_list = json.loads(violations_text)
            except:
                violations_list = []
        else:
            violations_list = violations_text
        
        # Format violations for prompt
        violations_formatted = ""
        for v in violations_list:
            if isinstance(v, dict):
                violations_formatted += f"- Code {v.get('code', '')}: {v.get('description', '')}\n"
        
        prompt = f"""Analyze this San Francisco restaurant inspection and extract structured metadata.

INSPECTION DATA:
Facility: {inspection.get('facility_name', 'Unknown')}
Address: {inspection.get('address', 'Unknown')}
Date: {inspection.get('inspection_date', 'Unknown')}
Inspector: {inspection.get('inspector_name', 'Unknown')}
Status: {inspection.get('inspection_status', 'Unknown')}
Violation Count: {inspection.get('violation_count', 0)}
Violations:
{violations_formatted if violations_formatted else 'None'}

Corrective Actions: {inspection.get('corrective_actions', 'None')}
Observations: {inspection.get('observations', 'None')}

Analyze and return ONLY a JSON object with these fields:
{{
  "cuisine_type": "Detect from name (e.g., Coffee Shop, Pizza, Chinese, etc.)",
  "neighborhood": "San Francisco neighborhood from address (e.g., Financial District, Mission, etc.)",
  "service_style": "Quick Service/Full Service/Takeout/Cafe/etc.",
  "price_range": "$, $$, $$$, or $$$$",
  "is_chain": true/false,
  "chain_name": "Parent company if chain, null if not",
  "violation_categories": ["temperature", "hygiene", "equipment", "pests", etc.],
  "violation_severity": "Critical/Major/Minor based on violations",
  "critical_violations": ["list specific critical issues"],
  "risk_level": "High/Medium/Low",
  "compliance_score": 0-100 based on violations,
  "key_concerns": ["specific food safety issues found"],
  "education_focus": "Main topic to educate about based on violations",
  "regulatory_references": ["relevant California food codes violated"]
}}

CRITICAL: Return ONLY valid JSON. No explanations or text outside the JSON structure."""

        try:
            response = self.anthropic.messages.create(
                model="claude-3-haiku-20240307",
                max_tokens=800,
                temperature=0.1,
                messages=[{"role": "user", "content": prompt}]
            )
            
            metadata = json.loads(response.content[0].text)
            return metadata
            
        except Exception as e:
            print(f"Phase 1 error: {e}")
            # Return default metadata
            return {
                "cuisine_type": "Restaurant",
                "neighborhood": "San Francisco",
                "violation_severity": "Major" if inspection.get('violation_count', 0) > 2 else "Minor",
                "education_focus": "Food safety compliance"
            }
    
    def phase2_generate_article(self, inspection, metadata):
        """Phase 2: Generate educational article"""
        
        # Determine article tone based on status
        status = inspection.get('inspection_status', 'Unknown')
        if status == 'CLOSURE':
            tone = "serious and educational, emphasizing the importance of food safety"
        elif status == 'CONDITIONAL PASS':
            tone = "informative and constructive, focusing on improvements needed"
        else:
            tone = "positive but educational, highlighting good practices"
        
        # Get relevant government references
        refs = []
        for category in metadata.get('violation_categories', []):
            if category in ['temperature', 'cold', 'hot']:
                refs.append(self.gov_references['temperature']['danger_zone'])
            elif category in ['hygiene', 'handwashing', 'employee']:
                refs.append(self.gov_references['hygiene']['handwashing'])
            elif category in ['pest', 'rodent', 'insect']:
                refs.append(self.gov_references['pests']['prevention'])
        
        prompt = f"""Create an educational news article about this San Francisco restaurant inspection.

INSPECTION SUMMARY:
Facility: {inspection.get('facility_name')}
Location: {inspection.get('address')}
Date: {inspection.get('inspection_date')}
Status: {status}
Violations: {inspection.get('violation_count', 0)}

METADATA:
{json.dumps(metadata, indent=2)}

KEY VIOLATIONS:
{inspection.get('corrective_actions', 'None noted')}

OBSERVATIONS:
{inspection.get('observations', 'None noted')}

Write an educational article with a {tone} that:
1. Reports the inspection results factually
2. Educates readers about the food safety issues found
3. Explains why these violations matter for public health
4. Provides context about SF food safety standards
5. Uses proper HTML formatting with <p>, <h2>, <ul>, <li> tags

Return ONLY a JSON object with this structure:
{{
  "title": "Engaging, SEO-friendly title mentioning the restaurant and SF",
  "content": "Full HTML article content (minimum 400 words) with proper paragraph tags",
  "excerpt": "2-3 sentence summary for preview",
  "meta_description": "SEO description under 160 characters",
  "tags": ["san-francisco", "food-safety", "restaurant-inspection", "specific-neighborhood", etc.],
  "education_summary": "Brief summary of key food safety lessons",
  "severity_label": "routine/concerning/critical based on violations"
}}

CRITICAL: Return ONLY valid JSON with proper HTML in the content field."""

        try:
            response = self.anthropic.messages.create(
                model="claude-3-sonnet-20240229",
                max_tokens=2000,
                temperature=0.3,
                messages=[{"role": "user", "content": prompt}]
            )
            
            article = json.loads(response.content[0].text)
            
            # Add inspection data to article
            article['inspection_id'] = inspection.get('inspection_id')
            article['facility_name'] = inspection.get('facility_name')
            article['inspection_date'] = str(inspection.get('inspection_date'))
            article['inspection_status'] = status
            article['violation_count'] = inspection.get('violation_count', 0)
            
            return article
            
        except Exception as e:
            print(f"Phase 2 error: {e}")
            return None
    
    def generate_articles_batch(self, limit=5, status_filter=None):
        """Generate articles for recent inspections"""
        inspections = self.get_recent_inspections(limit, status_filter)
        
        articles = []
        for inspection in inspections:
            print(f"\nProcessing: {inspection['facility_name'][:50]}...")
            
            # Phase 1: Analyze
            print("  Phase 1: Analyzing metadata...")
            metadata = self.phase1_analyze_metadata(inspection)
            
            # Phase 2: Generate article
            print("  Phase 2: Generating article...")
            article = self.phase2_generate_article(inspection, metadata)
            
            if article:
                articles.append(article)
                print(f"  ✓ Article generated: {article['title'][:60]}...")
                
                # Save article to file
                self.save_article(article)
            else:
                print("  ✗ Article generation failed")
        
        return articles
    
    def save_article(self, article):
        """Save article to JSON file"""
        output_dir = "/var/www/twin-digital-media/public_html/_sites/cleankitchens/data/sf/articles"
        os.makedirs(output_dir, exist_ok=True)
        
        filename = f"{article['inspection_id']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        filepath = os.path.join(output_dir, filename)
        
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(article, f, indent=2, ensure_ascii=False)
        
        print(f"  💾 Saved to: {filename}")
    
    def test_generation(self):
        """Test with one inspection of each status type"""
        print("="*60)
        print("SF TWO-PHASE ARTICLE GENERATION TEST")
        print("="*60)
        
        # Test each status type
        for status in ['CLOSURE', 'CONDITIONAL PASS', 'PASS']:
            print(f"\n🔍 Testing {status} inspection...")
            articles = self.generate_articles_batch(limit=1, status_filter=status)
            
            if articles:
                print(f"✅ Successfully generated article for {status}")
            else:
                print(f"⚠️  No {status} inspections found")
        
        print("\n" + "="*60)
        print("TEST COMPLETE")
        print("="*60)

if __name__ == "__main__":
    generator = SFTwoPhaseGenerator()
    
    # Test generation
    generator.test_generation()
    
    # Or generate batch
    # generator.generate_articles_batch(limit=10)