# -*- coding: utf-8 -*-

# --------------------------------------------------------------------------
# Copyright Commvault Systems, Inc.
# See LICENSE.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""helper class for data source crawl job

    CrawlJobHelper:

        __init__()                      --      initialize the CrawlJobHelper class

        monitor_crawl_job()             --      starts a crawl job and monitor till end
                                                for a given data source name

        get_data_source_stats()         --      returns the data source solr core stats

        get_crawl_docs_count()          --      returns the number of documents crawled

        get_docs_count()                --      returns the count of documents that can
                                                be crawled in given folder path on given machine

        validate_crawl_files_count()    --      Validates whether crawled documents count is
                                                same as the actual documents count
                                                present on the data source

"""

from past.builtins import basestring
from AutomationUtils.machine import Machine
from Server.JobManager.jobmanager_helper import JobManager
from dynamicindex.utils.constants import FILE_DS_PREFIX
from dynamicindex.Datacube.dcube_solr_helper import SolrHelper


class CrawlJobHelper():
    """Helper class for data source crawl job monitoring"""

    def __init__(self, tc_object):
        """Initialize the class with testcase object"""
        self.commcell = tc_object.commcell
        self.log = tc_object.log
        self.solr_helper = SolrHelper(tc_object)

    def monitor_crawl_job(self, data_source_name):
        """Method to start and monitor the crawl job on a data source

            Args:

                data_source_name    (str)   --  data source name on which
                crawl job will start and be monitored

            Returns:
                None

            Raises:
                Exception:

                    if crawl job failed.
                    if data source not found.
        """
        data_source_obj = self.commcell.datacube.\
            datasources.get(data_source_name)
        self.log.info("Starting crawl job on datasource : %s",
                      data_source_name)
        job_id = data_source_obj.start_job()
        if job_id is None:
            raise Exception("Something went wrong with datasource start job")
        self.log.info("Job started with id %s",
                      job_id)
        job_manager = JobManager(_job=job_id, commcell=self.commcell)
        if job_manager.wait_for_state('completed', 10, 60, True):
            self.log.info("Crawl job completed successfully")
            return
        self.log.error("Crawl job failed.")
        raise Exception("Crawl job failed.")

    def get_data_source_stats(self, data_source_name, client_name):
        """Returns the data source solr core stats

            Args:

                data_source_name    (str)   --  data source name
                client_name         (str)   --  node name of the index server

            Returns:

                core_stats          (dict)  --  details of the data source
                solr statistics
        """
        data_source_obj = self.commcell.datacube. \
            datasources.get(data_source_name)
        dcube_id = data_source_obj.datasource_id
        core_id = self.solr_helper.get_coreid_datasource(dcube_id)
        base_url = self.solr_helper.get_solr_baseurl(client_name, 1)
        data_source_core_name = "{0}{1}_{2}".format(
            FILE_DS_PREFIX,
            data_source_name,
            core_id
        )
        dcube_core_stats = self.solr_helper.get_corestats(
            baseurl=base_url, corename=data_source_core_name
        )
        return dcube_core_stats

    def get_docs_count(self, folder_path,
                       machine_name,
                       username=None, password=None,
                       include_folders=True):
        """Returns the count of files and folders in given folder path

            Args:

                folder_path     (str)   --  network or local path
                machine_name    (str)   --  if commcell client
                                                then client name
                                            else ip address or machine name
                                            for the machine containing the folder
                username        (str)   --  username for machine
                password        (str)   --  corresponding password for user
                include_folders (bool)  --  true to count files and folders in the given folder path
                                            false to count only the files in the given folder path

            Returns:

                count           (int)   --  number of documents present in the folder path

        """
        machine_obj = Machine(
            machine_name=machine_name,
            commcell_object=self.commcell,
            username=username,
            password=password
        )
        count = len(machine_obj.get_files_in_path(
            folder_path
        ))
        if include_folders:
            count += len(machine_obj.get_folders_in_path(
                folder_path
            ))
        return count

    def get_crawl_docs_count(self, data_source_name, client_name):
        """Returns the count of documents crawled from the data source

            Args:

                data_source_name    (str)   --  data source name
                client_name         (str)   --  node name of the index server

            Returns:

                count       (int)   --  total number of documents crawled from
                the data source
        """
        core_stats = self.get_data_source_stats(
            data_source_name=data_source_name, client_name=client_name)
        return core_stats.get('index', 0).get('numDocs', 0)

    def validate_crawl_files_count(self, data_source_name, include_directories_path,
                                   access_node_name, index_server_name):
        """Validates whether crawled documents count is same as the actual documents count
        present on the data source

        Args:
            data_source_name            (str)   -   file data source name

            include_directories_path    (str)   -   (,) separated directory paths
                                                    of the file data source

            access_node_name            (str)   -   access node client name of given
                                                    file data source

            index_server_name           (str)   -   index server name which is assigned
                                                    to the given ile data source

        Returns:
            None

        Raises:

            if input data is not valid

            If number of documents crawled is not same as the actual count

        """
        if not (isinstance(data_source_name, basestring) and
                isinstance(include_directories_path, basestring) and
                isinstance(access_node_name, basestring) and
                isinstance(index_server_name, basestring)):
            raise Exception("Input data is not of valid datatype")
        crawl_dir_paths = include_directories_path.split(',')
        total_files_count = 0
        for dir_path in crawl_dir_paths:
            total_files_count += self.get_docs_count(
                folder_path=dir_path,
                machine_name=access_node_name,
                include_folders=True
            )
        self.log.info("Number of files present in crawl directories : %s", total_files_count)
        crawled_files_count = self.get_crawl_docs_count(
            data_source_name=data_source_name,
            client_name=self.commcell.index_servers.get(index_server_name).client_name[0]
        )
        self.log.info(
            "Number of documents crawled : %s",
            crawled_files_count)
        if int(crawled_files_count) != int(total_files_count):
            self.log.error(
                "Number of crawled documents are invalid\nExpected: %s\tActual: %s",
                total_files_count,
                crawled_files_count)
            raise Exception("Number of documents crawled were incorrect")
        self.log.info("All the documents were crawled successfully")
