<?php
/**
 * Keyword Matcher Class
 * Handles matching keywords in content with available internal links
 */

if (!defined('ABSPATH')) {
    exit;
}

class SEOINUX_Keyword_Matcher {

    /**
     * Find keyword matches in content
     *
     * @param string $content Content to search in
     * @param array $links Array of links with keywords
     * @param int $exclude_post_id Post ID to exclude from suggestions
     * @return array Matched keywords with their URLs
     */
    public function find_matches($content, $links, $exclude_post_id = 0) {
        $matches = array();

        // Remove HTML tags but keep the text
        $plain_content = wp_strip_all_tags($content);
        $plain_content = strtolower($plain_content);

        // Remove existing links from content to avoid duplicate suggestions
        $existing_links = $this->extract_existing_links($content);

        foreach ($links as $link) {
            // Skip if this is the current post
            if ($exclude_post_id > 0) {
                $post_id = url_to_postid($link['url']);
                if ($post_id === $exclude_post_id) {
                    continue;
                }
            }

            // Skip if link is already in the content
            if (in_array($link['url'], $existing_links)) {
                continue;
            }

            $keywords = $this->parse_keywords($link['keywords']);

            if (empty($keywords)) {
                continue;
            }

            foreach ($keywords as $keyword) {
                $keyword_lower = strtolower(trim($keyword));

                if (empty($keyword_lower)) {
                    continue;
                }

                // Check if keyword appears in content
                if (strpos($plain_content, $keyword_lower) !== false) {
                    // Calculate relevance score based on frequency
                    $frequency = substr_count($plain_content, $keyword_lower);

                    $matches[] = array(
                        'keyword' => trim($keyword),
                        'url' => $link['url'],
                        'title' => $link['title'],
                        'score' => $frequency
                    );
                }
            }
        }

        // Sort by score (frequency) descending
        usort($matches, function($a, $b) {
            return $b['score'] - $a['score'];
        });

        return $matches;
    }

    /**
     * Parse keywords string into array
     *
     * @param string $keywords Comma-separated keywords
     * @return array Array of keywords
     */
    private function parse_keywords($keywords) {
        if (empty($keywords)) {
            return array();
        }

        // Split by comma and trim each keyword
        $keyword_array = array_map('trim', explode(',', $keywords));

        // Remove empty values
        $keyword_array = array_filter($keyword_array);

        return $keyword_array;
    }

    /**
     * Extract existing links from content
     *
     * @param string $content HTML content
     * @return array Array of URLs already present in content
     */
    private function extract_existing_links($content) {
        $existing_links = array();

        // Match all href attributes
        preg_match_all('/<a[^>]+href=([\'"])(.+?)\1[^>]*>/i', $content, $matches);

        if (!empty($matches[2])) {
            $existing_links = $matches[2];
        }

        return $existing_links;
    }

    /**
     * Find best keyword match for a specific text selection
     *
     * @param string $selected_text Text selected by user
     * @param array $links Array of links with keywords
     * @return array|null Best matching link or null
     */
    public function find_best_match_for_text($selected_text, $links) {
        $selected_text_lower = strtolower(trim($selected_text));
        $best_match = null;
        $highest_score = 0;

        foreach ($links as $link) {
            $keywords = $this->parse_keywords($link['keywords']);

            foreach ($keywords as $keyword) {
                $keyword_lower = strtolower(trim($keyword));

                // Exact match
                if ($keyword_lower === $selected_text_lower) {
                    return array(
                        'keyword' => trim($keyword),
                        'url' => $link['url'],
                        'title' => $link['title'],
                        'match_type' => 'exact'
                    );
                }

                // Partial match
                if (strpos($selected_text_lower, $keyword_lower) !== false ||
                    strpos($keyword_lower, $selected_text_lower) !== false) {

                    $score = similar_text($keyword_lower, $selected_text_lower);

                    if ($score > $highest_score) {
                        $highest_score = $score;
                        $best_match = array(
                            'keyword' => trim($keyword),
                            'url' => $link['url'],
                            'title' => $link['title'],
                            'match_type' => 'partial',
                            'score' => $score
                        );
                    }
                }
            }
        }

        return $best_match;
    }

    /**
     * Generate keyword suggestions from content
     *
     * @param string $content Content to analyze
     * @param int $max_suggestions Maximum number of suggestions
     * @return array Array of suggested keywords
     */
    public function suggest_keywords_from_content($content, $max_suggestions = 10) {
        // Remove HTML tags
        $plain_content = wp_strip_all_tags($content);

        // Remove common stop words
        $stop_words = array(
            'the', 'be', 'to', 'of', 'and', 'a', 'in', 'that', 'have', 'i',
            'it', 'for', 'not', 'on', 'with', 'he', 'as', 'you', 'do', 'at',
            'this', 'but', 'his', 'by', 'from', 'they', 'we', 'say', 'her', 'she',
            'or', 'an', 'will', 'my', 'one', 'all', 'would', 'there', 'their',
            'what', 'so', 'up', 'out', 'if', 'about', 'who', 'get', 'which', 'go', 'me'
        );

        // Extract words
        $words = str_word_count(strtolower($plain_content), 1);

        // Filter stop words
        $filtered_words = array_filter($words, function($word) use ($stop_words) {
            return strlen($word) > 3 && !in_array($word, $stop_words);
        });

        // Count word frequency
        $word_freq = array_count_values($filtered_words);

        // Sort by frequency
        arsort($word_freq);

        // Get top N words
        $suggestions = array_slice(array_keys($word_freq), 0, $max_suggestions);

        return $suggestions;
    }
}
