<?php
/**
 * CleanKitchens Sitemap Generation System
 * Automatic age-based sitemap generation from Weaviate
 */

require_once __DIR__ . '/../includes/config.php';
require_once __DIR__ . '/../functions_live.php';

class SitemapGenerator {
    private $baseUrl = 'https://cleankitchens.org';
    private $sitemapDir;
    
    public function __construct() {
        // Set sitemap directory to web root for accessibility
        $this->sitemapDir = '/var/www/twin-digital-media/public_html/_sites/cleankitchens/';
    }
    
    /**
     * Generate news sitemap for articles less than 48 hours old
     * For Google News inclusion
     */
    public function generateNewsSitemap() {
        $cutoffDate = date('c', strtotime('-48 hours'));
        
        $query = '{
            Get {
                Articles(
                    where: {
                        path: ["published_date"]
                        operator: GreaterThan
                        valueDate: "' . $cutoffDate . '"
                    }
                    sort: [{path: ["published_date"], order: desc}]
                    limit: 200
                ) {
                    slug
                    title
                    published_date
                    city
                    state
                }
            }
        }';
        
        $response = queryWeaviate($query);
        
        if (!$response || !isset($response['data']['Get']['Articles'])) {
            return false;
        }
        
        $articles = $response['data']['Get']['Articles'];
        
        // Build XML
        $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
        $xml .= '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"' . "\n";
        $xml .= '        xmlns:news="http://www.google.com/schemas/sitemap-news/0.9">' . "\n";
        
        foreach ($articles as $article) {
            $xml .= '  <url>' . "\n";
            $xml .= '    <loc>' . $this->baseUrl . '/' . htmlspecialchars($article['slug']) . '</loc>' . "\n";
            $xml .= '    <news:news>' . "\n";
            $xml .= '      <news:publication>' . "\n";
            $xml .= '        <news:name>CleanKitchens</news:name>' . "\n";
            $xml .= '        <news:language>en</news:language>' . "\n";
            $xml .= '      </news:publication>' . "\n";
            $xml .= '      <news:publication_date>' . date('c', strtotime($article['published_date'])) . '</news:publication_date>' . "\n";
            $xml .= '      <news:title>' . htmlspecialchars($article['title']) . '</news:title>' . "\n";
            $xml .= '    </news:news>' . "\n";
            $xml .= '    <lastmod>' . date('c', strtotime($article['published_date'])) . '</lastmod>' . "\n";
            $xml .= '    <changefreq>hourly</changefreq>' . "\n";
            $xml .= '    <priority>1.0</priority>' . "\n";
            $xml .= '  </url>' . "\n";
        }
        
        $xml .= '</urlset>';
        
        // Save sitemap
        file_put_contents($this->sitemapDir . 'sitemap_news.xml', $xml);
        echo "Generated news sitemap with " . count($articles) . " articles\n";
        
        return true;
    }
    
    /**
     * Generate recent sitemap for articles 2-30 days old
     */
    public function generateRecentSitemap() {
        $startDate = date('c', strtotime('-30 days'));
        $endDate = date('c', strtotime('-2 days'));
        
        $query = '{
            Get {
                Articles(
                    where: {
                        operator: And
                        operands: [
                            {
                                path: ["published_date"]
                                operator: GreaterThan
                                valueDate: "' . $startDate . '"
                            }
                            {
                                path: ["published_date"]
                                operator: LessThan
                                valueDate: "' . $endDate . '"
                            }
                        ]
                    }
                    sort: [{path: ["published_date"], order: desc}]
                    limit: 2000
                ) {
                    slug
                    published_date
                }
            }
        }';
        
        $response = queryWeaviate($query);
        
        if (!$response || !isset($response['data']['Get']['Articles'])) {
            return false;
        }
        
        $articles = $response['data']['Get']['Articles'];
        
        // Remove duplicates by slug
        $uniqueArticles = [];
        $seenSlugs = [];
        
        foreach ($articles as $article) {
            if (!in_array($article['slug'], $seenSlugs)) {
                $uniqueArticles[] = $article;
                $seenSlugs[] = $article['slug'];
            }
        }
        
        // Build XML
        $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
        $xml .= '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
        
        foreach ($uniqueArticles as $article) {
            $xml .= '  <url>' . "\n";
            $xml .= '    <loc>' . $this->baseUrl . '/' . htmlspecialchars($article['slug']) . '</loc>' . "\n";
            $xml .= '    <lastmod>' . date('c', strtotime($article['published_date'])) . '</lastmod>' . "\n";
            $xml .= '    <changefreq>daily</changefreq>' . "\n";
            $xml .= '    <priority>0.8</priority>' . "\n";
            $xml .= '  </url>' . "\n";
        }
        
        $xml .= '</urlset>';
        
        // Save sitemap
        file_put_contents($this->sitemapDir . 'sitemap_recent.xml', $xml);
        echo "Generated recent sitemap with " . count($articles) . " articles\n";
        
        return true;
    }
    
    /**
     * Generate archive sitemaps for articles 30+ days old (by month)
     */
    public function generateArchiveSitemaps() {
        // Get oldest article to determine starting point
        $query = '{
            Get {
                Articles(
                    sort: [{path: ["published_date"], order: asc}]
                    limit: 1
                ) {
                    published_date
                }
            }
        }';
        
        $response = queryWeaviate($query);
        
        if (!$response || !isset($response['data']['Get']['Articles'][0])) {
            return false;
        }
        
        $oldestDate = new DateTime($response['data']['Get']['Articles'][0]['published_date']);
        $cutoffDate = new DateTime('-30 days');
        
        // Generate sitemap for each month from oldest to cutoff
        $currentDate = clone $oldestDate;
        $currentDate->modify('first day of this month');
        
        $generatedCount = 0;
        
        while ($currentDate < $cutoffDate) {
            $year = $currentDate->format('Y');
            $month = $currentDate->format('m');
            
            // Get last day of month
            $lastDay = $currentDate->format('t');
            
            $startDate = $currentDate->format('Y-m-d\T00:00:00\Z');
            $endDate = $currentDate->format('Y-m-t\T23:59:59\Z');
            
            $query = '{
                Get {
                    Articles(
                        where: {
                            operator: And
                            operands: [
                                {
                                    path: ["published_date"]
                                    operator: GreaterThanEqual
                                    valueDate: "' . $startDate . '"
                                }
                                {
                                    path: ["published_date"]
                                    operator: LessThanEqual
                                    valueDate: "' . $endDate . '"
                                }
                            ]
                        }
                        sort: [{path: ["published_date"], order: desc}]
                        limit: 5000
                    ) {
                        slug
                        published_date
                    }
                }
            }';
            
            $response = queryWeaviate($query);
            
            if ($response && isset($response['data']['Get']['Articles']) && !empty($response['data']['Get']['Articles'])) {
                $articles = $response['data']['Get']['Articles'];
                
                // Build XML
                $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
                $xml .= '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
                
                foreach ($articles as $article) {
                    $xml .= '  <url>' . "\n";
                    $xml .= '    <loc>' . $this->baseUrl . '/' . htmlspecialchars($article['slug']) . '</loc>' . "\n";
                    $xml .= '    <lastmod>' . date('c', strtotime($article['published_date'])) . '</lastmod>' . "\n";
                    $xml .= '    <changefreq>monthly</changefreq>' . "\n";
                    $xml .= '    <priority>0.5</priority>' . "\n";
                    $xml .= '  </url>' . "\n";
                }
                
                $xml .= '</urlset>';
                
                // Save sitemap
                $filename = sprintf('sitemap_archive_%s_%s.xml', $year, $month);
                file_put_contents($this->sitemapDir . $filename, $xml);
                echo "Generated archive sitemap $filename with " . count($articles) . " articles\n";
                $generatedCount++;
            }
            
            // Move to next month
            $currentDate->modify('first day of next month');
        }
        
        echo "Generated $generatedCount archive sitemaps\n";
        return true;
    }
    
    /**
     * Generate sitemap index listing all sitemaps
     */
    public function generateSitemapIndex() {
        $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
        $xml .= '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
        
        // Add news sitemap
        if (file_exists($this->sitemapDir . 'sitemap_news.xml')) {
            $xml .= '  <sitemap>' . "\n";
            $xml .= '    <loc>' . $this->baseUrl . '/sitemap_news.xml</loc>' . "\n";
            $xml .= '    <lastmod>' . date('c', filemtime($this->sitemapDir . 'sitemap_news.xml')) . '</lastmod>' . "\n";
            $xml .= '  </sitemap>' . "\n";
        }
        
        // Add recent sitemap
        if (file_exists($this->sitemapDir . 'sitemap_recent.xml')) {
            $xml .= '  <sitemap>' . "\n";
            $xml .= '    <loc>' . $this->baseUrl . '/sitemap_recent.xml</loc>' . "\n";
            $xml .= '    <lastmod>' . date('c', filemtime($this->sitemapDir . 'sitemap_recent.xml')) . '</lastmod>' . "\n";
            $xml .= '  </sitemap>' . "\n";
        }
        
        // Add all archive sitemaps
        $archiveFiles = glob($this->sitemapDir . 'sitemap_archive_*.xml');
        if ($archiveFiles) {
            // Sort by filename to get chronological order
            sort($archiveFiles);
            
            foreach ($archiveFiles as $file) {
                $filename = basename($file);
                $xml .= '  <sitemap>' . "\n";
                $xml .= '    <loc>' . $this->baseUrl . '/' . $filename . '</loc>' . "\n";
                $xml .= '    <lastmod>' . date('c', filemtime($file)) . '</lastmod>' . "\n";
                $xml .= '  </sitemap>' . "\n";
            }
        }
        
        // Add static pages sitemap
        $xml .= '  <sitemap>' . "\n";
        $xml .= '    <loc>' . $this->baseUrl . '/sitemap_pages.xml</loc>' . "\n";
        $xml .= '    <lastmod>' . date('c') . '</lastmod>' . "\n";
        $xml .= '  </sitemap>' . "\n";
        
        $xml .= '</sitemapindex>';
        
        // Save sitemap index
        file_put_contents($this->sitemapDir . 'sitemap.xml', $xml);
        echo "Generated sitemap index\n";
        
        return true;
    }
    
    /**
     * Generate static pages sitemap
     */
    public function generatePagesSitemap() {
        $pages = [
            ['loc' => '/', 'changefreq' => 'hourly', 'priority' => '1.0'],
            ['loc' => '/news', 'changefreq' => 'hourly', 'priority' => '0.9'],
            ['loc' => '/about', 'changefreq' => 'monthly', 'priority' => '0.7'],
            ['loc' => '/methodology', 'changefreq' => 'monthly', 'priority' => '0.6'],
            ['loc' => '/editorial-policy', 'changefreq' => 'monthly', 'priority' => '0.6'],
            ['loc' => '/privacy', 'changefreq' => 'yearly', 'priority' => '0.5'],
            ['loc' => '/terms', 'changefreq' => 'yearly', 'priority' => '0.5'],
            ['loc' => '/disclaimer', 'changefreq' => 'monthly', 'priority' => '0.6'],
            ['loc' => '/corrections', 'changefreq' => 'weekly', 'priority' => '0.6'],
            ['loc' => '/contact', 'changefreq' => 'monthly', 'priority' => '0.5'],
            ['loc' => '/data-sources.php', 'changefreq' => 'monthly', 'priority' => '0.6'],
        ];
        
        $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
        $xml .= '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
        
        foreach ($pages as $page) {
            $xml .= '  <url>' . "\n";
            $xml .= '    <loc>' . $this->baseUrl . $page['loc'] . '</loc>' . "\n";
            $xml .= '    <lastmod>' . date('c') . '</lastmod>' . "\n";
            $xml .= '    <changefreq>' . $page['changefreq'] . '</changefreq>' . "\n";
            $xml .= '    <priority>' . $page['priority'] . '</priority>' . "\n";
            $xml .= '  </url>' . "\n";
        }
        
        $xml .= '</urlset>';
        
        // Save sitemap
        file_put_contents($this->sitemapDir . 'sitemap_pages.xml', $xml);
        echo "Generated pages sitemap\n";
        
        return true;
    }
    
    /**
     * Run all sitemap generations
     */
    public function generateAll() {
        echo "Starting sitemap generation...\n";
        echo "================================\n";
        
        $this->generateNewsSitemap();
        $this->generateRecentSitemap();
        $this->generateArchiveSitemaps();
        $this->generatePagesSitemap();
        $this->generateSitemapIndex();
        
        echo "================================\n";
        echo "Sitemap generation complete!\n";
    }
}

// Run if called directly
if (php_sapi_name() === 'cli') {
    $generator = new SitemapGenerator();
    
    // Check for specific command
    if ($argc > 1) {
        switch ($argv[1]) {
            case 'news':
                $generator->generateNewsSitemap();
                break;
            case 'recent':
                $generator->generateRecentSitemap();
                break;
            case 'archive':
                $generator->generateArchiveSitemaps();
                break;
            case 'index':
                $generator->generateSitemapIndex();
                break;
            case 'pages':
                $generator->generatePagesSitemap();
                break;
            default:
                $generator->generateAll();
        }
    } else {
        $generator->generateAll();
    }
}
?>