<?php
/**
 * Knowledge Base Migrator
 *
 * Handles migration of knowledge base entries between databases and embedding models
 *
 * @package MxChat_Migration_Tool
 */

if (!defined('ABSPATH')) {
    exit;
}

class MxChat_Knowledge_Migrator {

    /**
     * Source database type
     */
    private $source_database;

    /**
     * Target database type
     */
    private $target_database;

    /**
     * Source embedding model
     */
    private $source_model;

    /**
     * Target embedding model
     */
    private $target_model;

    /**
     * API key for embedding generation
     */
    private $api_key;

    /**
     * Pinecone configuration
     */
    private $pinecone_config = array();

    /**
     * Migration ID
     */
    private $migration_id;

    /**
     * Constructor
     */
    public function __construct($config) {
        $this->source_database = $config['source_database'];
        $this->target_database = $config['target_database'];
        $this->source_model = $config['source_model'];
        $this->target_model = $config['target_model'];
        $this->api_key = $config['api_key'];
        $this->migration_id = $config['migration_id'];

        if (isset($config['pinecone_config'])) {
            $this->pinecone_config = $config['pinecone_config'];
        }
    }

    /**
     * Get total number of knowledge base entries
     */
    public function get_total_items() {
        global $wpdb;

        if ($this->source_database === 'wordpress') {
            return (int) $wpdb->get_var("SELECT COUNT(*) FROM {$wpdb->prefix}mxchat_system_prompt_content");
        } elseif ($this->source_database === 'pinecone') {
            // For Pinecone, we need to fetch all vectors first
            // This is an approximation - Pinecone doesn't have a direct count
            $vectors = $this->fetch_all_pinecone_vectors();
            return count($vectors);
        }

        return 0;
    }

    /**
     * Migrate knowledge base entries in batches
     */
    public function migrate_batch($offset, $limit) {
        $items = $this->fetch_source_items($offset, $limit);

        if (empty($items)) {
            return array(
                'success' => true,
                'processed' => 0,
                'failed' => 0,
                'message' => 'No more items to process'
            );
        }

        $processed = 0;
        $failed = 0;
        $errors = array();

        foreach ($items as $item) {
            try {
                // Generate new embedding with target model
                $new_embedding = $this->generate_embedding($item['content'], $this->target_model, $this->api_key);

                if (!$new_embedding) {
                    throw new Exception('Failed to generate embedding for item: ' . $item['id']);
                }

                // Store in target database
                $result = $this->store_item($item, $new_embedding);

                if ($result) {
                    $processed++;
                } else {
                    $failed++;
                    $errors[] = 'Failed to store item: ' . $item['id'];
                }

            } catch (Exception $e) {
                $failed++;
                $errors[] = $e->getMessage();
                error_log('MxChat Migration Error: ' . $e->getMessage());
            }
        }

        return array(
            'success' => true,
            'processed' => $processed,
            'failed' => $failed,
            'errors' => $errors,
            'message' => sprintf('Processed %d items, %d failed', $processed, $failed)
        );
    }

    /**
     * Fetch items from source database
     */
    private function fetch_source_items($offset, $limit) {
        global $wpdb;

        if ($this->source_database === 'wordpress') {
            $results = $wpdb->get_results(
                $wpdb->prepare(
                    "SELECT * FROM {$wpdb->prefix}mxchat_system_prompt_content ORDER BY id LIMIT %d OFFSET %d",
                    $limit,
                    $offset
                ),
                ARRAY_A
            );

            $items = array();
            foreach ($results as $row) {
                $items[] = array(
                    'id' => $row['id'],
                    'content' => $row['article_content'],
                    'url' => $row['url'],
                    'source_url' => $row['source_url'],
                    'role_restriction' => $row['role_restriction'],
                    'timestamp' => $row['timestamp']
                );
            }

            return $items;

        } elseif ($this->source_database === 'pinecone') {
            // Fetch from Pinecone
            $all_vectors = $this->fetch_all_pinecone_vectors();
            return array_slice($all_vectors, $offset, $limit);
        }

        return array();
    }

    /**
     * Fetch all vectors from Pinecone
     */
    private function fetch_all_pinecone_vectors() {
        // Get current Pinecone configuration
        $pinecone_options = get_option('mxchat_pinecone_addon_options', array());

        if (empty($pinecone_options['mxchat_pinecone_api_key']) || empty($pinecone_options['mxchat_pinecone_host'])) {
            return array();
        }

        $api_key = $pinecone_options['mxchat_pinecone_api_key'];
        $host = $pinecone_options['mxchat_pinecone_host'];
        // REMOVED: namespace - always use __default__ to match core plugin

        // Get role restrictions from local database
        global $wpdb;
        $role_data = $wpdb->get_results(
            "SELECT vector_id, source_url, role_restriction FROM {$wpdb->prefix}mxchat_pinecone_roles",
            ARRAY_A
        );

        $role_map = array();
        foreach ($role_data as $row) {
            $role_map[$row['vector_id']] = array(
                'source_url' => $row['source_url'],
                'role_restriction' => $row['role_restriction']
            );
        }

        // Query Pinecone to get all vectors
        // Note: Pinecone doesn't have a direct "fetch all" API, so we'll use a broad query
        // Ensure host has https:// prefix
        $host = str_replace(array('https://', 'http://'), '', $host);
        $url = 'https://' . rtrim($host, '/') . '/query';

        // Don't include namespace - this will query __default__ namespace (same as core plugin)
        $body = array(
            'topK' => 10000, // Maximum number of results
            'includeMetadata' => true,
            'includeValues' => false,
            // REMOVED: 'namespace' parameter - Always use __default__
            'vector' => array_fill(0, 1536, 0) // Dummy vector for query
        );

        $response = wp_remote_post($url, array(
            'headers' => array(
                'Api-Key' => $api_key,
                'Content-Type' => 'application/json'
            ),
            'body' => wp_json_encode($body),
            'timeout' => 60
        ));

        if (is_wp_error($response)) {
            error_log('Pinecone fetch error: ' . $response->get_error_message());
            return array();
        }

        $response_body = wp_remote_retrieve_body($response);
        $data = json_decode($response_body, true);

        if (!isset($data['matches'])) {
            return array();
        }

        $items = array();
        foreach ($data['matches'] as $match) {
            $vector_id = $match['id'];
            $metadata = isset($match['metadata']) ? $match['metadata'] : array();

            $role_info = isset($role_map[$vector_id]) ? $role_map[$vector_id] : array(
                'source_url' => '',
                'role_restriction' => 'public'
            );

            // Prioritize source_url from metadata, fallback to role_map
            $source_url = isset($metadata['source_url']) && !empty($metadata['source_url'])
                ? $metadata['source_url']
                : $role_info['source_url'];

            $items[] = array(
                'id' => $vector_id,
                'content' => isset($metadata['text']) ? $metadata['text'] : '',
                'url' => isset($metadata['url']) ? $metadata['url'] : '',
                'source_url' => $source_url,
                'role_restriction' => $role_info['role_restriction'],
                'timestamp' => isset($metadata['timestamp']) ? $metadata['timestamp'] : current_time('mysql')
            );
        }

        return $items;
    }

    /**
     * Generate embedding for content
     */
    private function generate_embedding($content, $model, $api_key) {
        // Use the core plugin's embedding generation method from MxChat_Admin
        if (class_exists('MxChat_Admin')) {
            // Get the knowledge manager instance
            if (class_exists('MxChat_Knowledge_Manager')) {
                $knowledge_manager = MxChat_Knowledge_Manager::get_instance();
                $admin = new MxChat_Admin($knowledge_manager);

                // Temporarily set the embedding model and API key
                $options = get_option('mxchat_options');
                $original_model = isset($options['embedding_model']) ? $options['embedding_model'] : '';
                $original_api_key = isset($options['api_key']) ? $options['api_key'] : '';

                $options['embedding_model'] = $model;

                // Set the appropriate API key based on model
                if (strpos($model, 'voyage') === 0) {
                    $original_voyage_key = isset($options['voyage_api_key']) ? $options['voyage_api_key'] : '';
                    $options['voyage_api_key'] = $api_key;
                } elseif (strpos($model, 'gemini-embedding') === 0) {
                    $original_gemini_key = isset($options['gemini_api_key']) ? $options['gemini_api_key'] : '';
                    $options['gemini_api_key'] = $api_key;
                } else {
                    $options['api_key'] = $api_key;
                }

                update_option('mxchat_options', $options);

                // Generate embedding
                $embedding = $admin->mxchat_generate_embedding($content);

                // Restore original settings
                $options['embedding_model'] = $original_model;
                if (strpos($model, 'voyage') === 0) {
                    $options['voyage_api_key'] = $original_voyage_key;
                } elseif (strpos($model, 'gemini-embedding') === 0) {
                    $options['gemini_api_key'] = $original_gemini_key;
                } else {
                    $options['api_key'] = $original_api_key;
                }
                update_option('mxchat_options', $options);

                // Check if embedding generation was successful
                if (is_array($embedding)) {
                    return $embedding;
                }

                error_log('MxChat Migration: Embedding generation failed: ' . print_r($embedding, true));
                return false;
            }
        }

        return false;
    }

    /**
     * Store item in target database
     */
    private function store_item($item, $embedding) {
        global $wpdb;

        if ($this->target_database === 'wordpress') {
            // Check if migrating within same database (just changing model)
            if ($this->source_database === 'wordpress' && isset($item['id'])) {
                // Update existing entry with new embedding
                $result = $wpdb->update(
                    $wpdb->prefix . 'mxchat_system_prompt_content',
                    array(
                        'embedding_vector' => serialize($embedding)
                    ),
                    array('id' => $item['id']),
                    array('%s'),
                    array('%d')
                );

                error_log(sprintf(
                    'MIGRATION: Updated WordPress entry ID %d with new embedding',
                    $item['id']
                ));

                return $result !== false;
            } else {
                // Migrating from different database (Pinecone → WordPress)
                // Insert new entry
                $result = $wpdb->insert(
                    $wpdb->prefix . 'mxchat_system_prompt_content',
                    array(
                        'url' => $item['url'],
                        'article_content' => $item['content'],
                        'embedding_vector' => serialize($embedding),
                        'source_url' => $item['source_url'],
                        'role_restriction' => $item['role_restriction'],
                        'timestamp' => current_time('mysql')
                    ),
                    array('%s', '%s', '%s', '%s', '%s', '%s')
                );

                return $result !== false;
            }

        } elseif ($this->target_database === 'pinecone') {
            // Store in Pinecone (always uses upsert, so works for both scenarios)
            return $this->store_in_pinecone($item, $embedding);
        }

        return false;
    }

    /**
     * Store item in Pinecone
     */
    private function store_in_pinecone($item, $embedding) {
        $api_key = isset($this->pinecone_config['api_key']) ? $this->pinecone_config['api_key'] : '';
        $host = isset($this->pinecone_config['host']) ? $this->pinecone_config['host'] : '';
        // REMOVED: namespace variable - always use __default__ to match core plugin behavior

        if (empty($api_key) || empty($host)) {
            return false;
        }

        // Ensure host has https:// prefix
        $host = str_replace(array('https://', 'http://'), '', $host);
        $url = 'https://' . rtrim($host, '/') . '/vectors/upsert';

        // Generate a unique vector ID
        $vector_id = 'kb_' . md5($item['url'] . $item['content'] . time());

        // Don't include 'namespace' in body - this will use __default__ namespace (same as core plugin)
        $body = array(
            'vectors' => array(
                array(
                    'id' => $vector_id,
                    'values' => $embedding,
                    'metadata' => array(
                        'text' => substr($item['content'], 0, 40000), // Pinecone metadata limit
                        'url' => $item['url'],
                        'source_url' => isset($item['source_url']) ? $item['source_url'] : '',
                        'timestamp' => current_time('mysql')
                    )
                )
            )
            // REMOVED: 'namespace' parameter - Always use __default__ namespace to match core plugin
        );

        $response = wp_remote_post($url, array(
            'headers' => array(
                'Api-Key' => $api_key,
                'Content-Type' => 'application/json'
            ),
            'body' => wp_json_encode($body),
            'timeout' => 30
        ));

        if (is_wp_error($response)) {
            error_log('Pinecone upsert error: ' . $response->get_error_message());
            return false;
        }

        $response_code = wp_remote_retrieve_response_code($response);

        if ($response_code !== 200) {
            error_log('Pinecone upsert failed with code: ' . $response_code);
            return false;
        }

        // Store role restriction in local database
        global $wpdb;
        $wpdb->replace(
            $wpdb->prefix . 'mxchat_pinecone_roles',
            array(
                'vector_id' => $vector_id,
                'source_url' => $item['source_url'],
                'role_restriction' => $item['role_restriction'],
                'updated_at' => current_time('mysql')
            ),
            array('%s', '%s', '%s', '%s')
        );

        return true;
    }

    /**
     * Clean up source data (optional - only if user wants to delete old data)
     */
    public function cleanup_source_data() {
        global $wpdb;

        if ($this->source_database === 'wordpress') {
            // Delete all entries from WordPress database
            $wpdb->query("TRUNCATE TABLE {$wpdb->prefix}mxchat_system_prompt_content");
        } elseif ($this->source_database === 'pinecone') {
            // Delete all vectors from Pinecone namespace
            $this->delete_pinecone_namespace();
        }

        return true;
    }

    /**
     * Delete Pinecone default namespace (all vectors)
     */
    private function delete_pinecone_namespace() {
        $pinecone_options = get_option('mxchat_pinecone_addon_options', array());

        if (empty($pinecone_options['mxchat_pinecone_api_key']) || empty($pinecone_options['mxchat_pinecone_host'])) {
            return false;
        }

        $api_key = $pinecone_options['mxchat_pinecone_api_key'];
        $host = $pinecone_options['mxchat_pinecone_host'];
        // REMOVED: namespace - always use __default__ to match core plugin

        // Ensure host has https:// prefix
        $host = str_replace(array('https://', 'http://'), '', $host);
        $url = 'https://' . rtrim($host, '/') . '/vectors/delete';

        // Don't include namespace - this will delete from __default__ namespace
        $body = array(
            'deleteAll' => true
            // REMOVED: 'namespace' parameter - Always use __default__
        );

        $response = wp_remote_post($url, array(
            'headers' => array(
                'Api-Key' => $api_key,
                'Content-Type' => 'application/json'
            ),
            'body' => wp_json_encode($body),
            'method' => 'POST',
            'timeout' => 30
        ));

        if (is_wp_error($response)) {
            error_log('Pinecone delete error: ' . $response->get_error_message());
            return false;
        }

        return true;
    }
}
