"""
Created on Tue Dec 04 8:38:11 2015

@author: smujjiga@commvault.com
"""
handleKey = "ee"

import csv
import logging
import os
import re
import sys
import threading
import time
import urllib.parse
from collections import Counter
from pathlib import Path
from datetime import datetime
from logging.handlers import RotatingFileHandler
from multiprocessing import Process, Queue, Value
from threading import Event, Thread

import psutil

import ujson
from CvCAGenericLogger import get_logger_handler
from CvEEClient import main_client
from CvEEConfigHelper import *
from CvEEJobPublisher import main_publisher
from CvEEMsgQueueHandler import GenericMsgQueueCommunicator

try:
    from winreg import CreateKey, OpenKey
    import win32serviceutil as svc
except:
    pass

COMMSERVER_REACHABLE = Value("i", 1)
cmdQP = None
cmdQC = None
cmdQG = None
eePublisherProcess = None
eeClientProcess = None
eeGenericProcess = None
process_task_index_mapping = None
pids = ""

PROCESS_ID = os.getpid()
THREAD_ID = threading.current_thread().ident

NO_PROCESSES = 5
NO_GENERIC_PROCESSES = 10
NO_SP_NER_PROCESSES = 4
NUM_DOC_TAGGER_PROCESSES = 0
IS_DUMMY_NER_PROCESS = False
IS_REGISTRY_NER_PROCESSES = True

PROCESS_RESTART_TIME = 20 * 60  # 20 Mins
PUBLISHER_SLEEP_TIMER = 30 * 60  # 30 Mins
CLIENTS_CHECK_TIMER = 3 * 60  # 3 Mins
GENERIC_CLIENTS_CHECK_TIMER = 1 * 60  # 1 Mins
CHECK_COMMSERVER_CONNECTIVITY = 10 * 60  # 10 Mins

PROFILER = None
LOGGER_Handler = None
ENTITIES_ATTRIBUTES = {}


def get_regkey_path():
    instanceName = getInstanceName()
    aReg = ConnectRegistry(None, HKEY_LOCAL_MACHINE)
    regPath = "SOFTWARE\\Commvault Systems\\Galaxy\\{0}\\ContentAnalyzer".format(instanceName)
    try:
        aKey = OpenKey(aReg, regPath, 0, KEY_ALL_ACCESS)
    except:
        aKey = CreateKey(aReg, regPath)
    return aKey


def loadRegistryValues():
    instanceName = ""
    global REG_CONF, LOG_LEVELS, NO_PROCESSES, PROCESS_RESTART_TIME, NO_GENERIC_PROCESSES, NO_SP_NER_PROCESSES, IS_DUMMY_NER_PROCESS, IS_REGISTRY_NER_PROCESSES, NUM_DOC_TAGGER_PROCESSES

    try:
        aKey = None
        if is_linux() == False:
            aKey = get_regkey_path()
        else:
            aKey = "ContentAnalyzer"

        for key, val in list(REG_CONF.items()):
            if "keyType" in val:
                REG_CONF[key]["value"] = loadSpecificValue(
                    aKey, val["regKey"], val["value"], val["type"], val["keyType"]
                )
            else:
                REG_CONF[key]["value"] = loadSpecificValue(
                    aKey, val["regKey"], val["value"], val["type"]
                )

        REG_CONF["stompPort"]["value"] = getStompPort()
        NO_PROCESSES = loadSpecificValue(aKey, "sEENumClients", NO_PROCESSES, int)
        NO_GENERIC_PROCESSES = loadSpecificValue(
            aKey, "sEENumGenericClients", NO_GENERIC_PROCESSES, int
        )
        NUM_DOC_TAGGER_PROCESSES = loadSpecificValue(aKey, "bNumDocTaggerProcess", 1, int)
        NO_SP_NER_PROCESSES = loadSpecificValue(aKey, "sEENumSpNerClients", -1, int)
        """
            if sEENumSpNerClients regkey is not set then spawn NER processes based on available system resources
        """
        if NO_SP_NER_PROCESSES == -1:
            IS_REGISTRY_NER_PROCESSES = False
            NO_SP_NER_PROCESSES, IS_DUMMY_NER_PROCESS = calculate_possible_ner_processes()
        PROCESS_RESTART_TIME = (
            loadSpecificValue(aKey, "sEEClientRestartTime", PROCESS_RESTART_TIME / 60, int) * 60
        )

    except Exception as e:
        print("Error while accessing registry from Handler: {}".format(e))


def initLogging():
    module_name = "CVEEHandler"
    func_name = "initLogging"
    func_str = "{}::{}() - ".format(module_name, func_name)
    global LOGGER_Handler
    try:
        logger_options = {
            "ROTATING_BACKUP_COUNT": ROTATING_BACKUP_COUNT,
            "ROTATING_MAX_BYTES": ROTATING_MAX_BYTES,
        }

        LOGGER_Handler = get_logger_handler(
            os.path.join(getBaseDir(), get_helper_dll()), "ContentAnalyzer", logger_options
        )

        for key, val in list(REG_CONF.items()):
            LOGGER_Handler.debug(
                "Registry value for key {} is {}".format(key, val["value"]), func_str
            )

        LOGGER_Handler.info("No. of clients are %d" % NO_PROCESSES, func_str)
        LOGGER_Handler.info(
            "No. of generic clients are {} with {} dedicated for SP_NER".format(
                NO_GENERIC_PROCESSES, NO_SP_NER_PROCESSES
            ),
            func_str,
        )
        LOGGER_Handler.info("Separate Dedicated client for DOC_TAGGER", func_str)
        if IS_REGISTRY_NER_PROCESSES is False:
            if IS_DUMMY_NER_PROCESS is True:
                LOGGER_Handler.info(
                    "Dummy process spawned for Named Entity Recognition because of insufficient system resources. Available Memory {} GB, Total CPU cores {}".format(
                        get_available_memory(), get_cpu_cores()
                    )
                )
            else:
                LOGGER_Handler.info(
                    "Number of processes started for Named Entity Recognition is {}. Available Memory {} GB, Total CPU cores {}".format(
                        NO_SP_NER_PROCESSES, get_available_memory(), get_cpu_cores()
                    )
                )
        LOGGER_Handler.debug(
            "Client restart time is %d mins" % (PROCESS_RESTART_TIME / 60), func_str
        )

    except Exception as e:
        print("Error while initialising logging: {}".format(e))


class CommandHandler(GenericMsgQueueCommunicator):
    def __init__(self):
        GenericMsgQueueCommunicator.__init__(self)
        self.commandQueue = "PythonCommands"
        self.stompPort = REG_CONF["stompPort"]["value"]
        self.host = "127.0.0.1"
        self.killTimeout = 60
        self.clients = {self.commandQueue: {"subscribe": True, "client": None}}
        self._stop = Event()
        self.prefetchSize = 8

    def stop(self):
        module_name = "CVEEHandler.CommandHandler"
        func_name = "stop"
        func_str = "{}::{}() - ".format(module_name, func_name)
        try:
            LOGGER_Handler.info("Stopping Command Thread.", func_str)
            self._stop.set()
        except:
            LOGGER_Handler.exception("Error while stopping command thread", func_str)

    def startClients(self):
        try:
            startEE()
        except Exception:
            raise

    def stopClients(self):
        stopEE()

    def wakeClients(self):
        wakeEE()

    def run(self):
        module_name = "CVEEHandler.CommandHandler"
        func_name = "run"
        func_str = "{}::{}() - ".format(module_name, func_name)
        # Function which will listen to the python commands queue,
        # and send messages for analysis
        self.connectToQueue(handler_pid=os.getpid())
        LOGGER_Handler.info("Started Commands Thread", func_str)
        self._stop.clear()
        try:
            self.startClients()
        except Exception:
            self._stop.set()
            raise

        while not self._stop.isSet():
            try:
                frame = self.getFrame(self.clients[self.commandQueue]["client"])
                if frame == None:
                    continue
                payload = ujson.loads(frame)
                LOGGER_Handler.debug("Received message {0} from Commands".format(payload), func_str)
                self.handleCommand(payload)
            except Exception as e:
                LOGGER_Handler.error(
                    f"Exception while reading from Commands queue. Exception {e}", func_str
                )
                self.connected = False
                # self.connectToQueue()
                LOGGER_Handler.info(
                    "Message queue is down. Killing all Content Analyzer python processes.",
                    func_str,
                )
                killProcessAndChildren(os.getpid())
                break

        self.clients[self.commandQueue]["client"].disconnect()
        self.stopClients()

    def handleCommand(self, payload):
        module_name = "CVEEHandler"
        func_name = "handleCommand"
        func_str = "{}::{}() - ".format(module_name, func_name)
        if "command" not in payload:
            LOGGER_Handler.error(
                "Wrong format of payload in Command Queue: {}".format(payload), func_str
            )
        if payload["command"] == "kill":
            self._stop.set()
            threading.Timer(self.killTimeout, killProcessAndChildren, [os.getpid()]).start()
        if payload["command"] == "start":
            self.startClients()
        if payload["command"] == "stop":
            self.stopClients()
        if payload["command"] == "wake":
            self.wakeClients()


class ProfilerReader(Thread, GenericMsgQueueCommunicator):
    # Class which handles all memory profiling
    # tasks for CA
    def __init__(self):
        module_name = "CVEEHandler.ProfilerReader"
        func_name = "__init__"
        func_str = "{}::{}() - ".format(module_name, func_name)
        global REG_CONF, LOG_LEVELS
        Thread.__init__(self)
        GenericMsgQueueCommunicator.__init__(self)
        self.profilingQueue = "PROFILING"
        self.stompPort = REG_CONF["stompPort"]["value"]
        self.host = "127.0.0.1"
        self.clients = {self.profilingQueue: {"subscribe": True, "client": None}}
        self._stop = Event()
        self.prefetchSize = 8
        self.file_handle = None
        self.csv_handle = None
        self.PROFILING_FILE_NAME = getLogDir() + "\\CAProfiler.log"
        self.LOG_FORMAT = "%(asctime)s\t%(message)s"
        self.log_formatter = logging.Formatter(self.LOG_FORMAT, datefmt="%m/%d %H:%M:%S")
        self.LOGGER_Profiler = logging.getLogger(__name__ + "_Profiler")
        self.LOG_LEVEL = logging.getLevelName(LOG_LEVELS[REG_CONF["log_level"]["value"]])
        self.handler = RotatingFileHandler(
            self.PROFILING_FILE_NAME,
            maxBytes=REG_CONF["log_max_bytes"]["value"],
            backupCount=REG_CONF["log_backup_count"]["value"],
        )
        self.handler.setFormatter(self.log_formatter)
        self.LOGGER_Profiler.addHandler(self.handler)
        self.LOGGER_Profiler.setLevel(self.LOG_LEVEL)
        self.populateDefaults()
        self.AMQ_CONNECT_WAIT_TIME = 2 * 60
        LOGGER_Handler.info("Profiling logging started", func_str)
        self.csv_columns = [
            "processId",
            "source",
            "systemCPULoadPercentage",
            "systemMemoryUsagePercentage",
            "numberOfCPUCores",
            "hostname",
            "opTypeName",
            "activeThreads",
            "mimeType",
            "numTasks",
            "qAverageWaitTime",
            "taskAverageProcessingTime",
            "averageFileSize",
        ]

    def writeToCsv(self, row):
        module_name = "CVEEHandler"
        func_name = "writeToCsv"
        func_str = "{}::{}() - ".format(module_name, func_name)
        # Function to build csv message, write given arguments,
        # and pass to logger
        try:
            log_message = ""
            for key in self.csv_columns:
                log_message += str(row[key]) + "\t"
            self.LOGGER_Profiler.error(log_message)
        except:
            LOGGER_Handler.exception("Error while writing to CSV", func_str)

    def populateDefaults(self):
        global eeGenericProcess
        self.defaults = {
            "systemCPULoadPercentage": 0,
            "systemMemoryUsagePercentage": 0,
            "numberOfCPUCores": self.getNumCores(),
            "hostname": self.getHostname(),
            "processId": 0,
            "source": "Python",
            "opTypeName": "Not Defined",
            "activeThreads": len(eeGenericProcess),
            "mimeType": "Not Defined",
            "numTasks": 0,
            "qAverageWaitTime": 0,
            "taskAverageProcessingTime": 0,
            "averageFileSize": 0,
        }

    def getCPUUsage(self):
        module_name = "CVEEHandler"
        func_name = "getCPUUsage"
        func_str = "{}::{}() - ".format(module_name, func_name)
        # Function to analyse total CPU Usage
        try:
            cpu_percent = psutil.cpu_percent()
            return cpu_percent
        except:
            LOGGER_Handler.exception("Error while getting CPU Usage", func_str)

    def getMemoryUsage(self):
        module_name = "CVEEHandler"
        func_name = "getMemoryUsage"
        func_str = "{}::{}() - ".format(module_name, func_name)
        try:
            memory_usage = psutil.virtual_memory()[2]
            return memory_usage
        except:
            LOGGER_Handler.exception("Error while getting Memory Usage", func_str)

    def getNumCores(self):
        module_name = "CVEEHandler"
        func_name = "getNumCores"
        func_str = "{}::{}() - ".format(module_name, func_name)
        try:
            num_cores = psutil.cpu_count(logical=False)
            return num_cores
        except:
            LOGGER_Handler.exception("Error while getting number of cores", func_str)

    def getHostname(self):
        module_name = "CVEEHandler"
        func_name = "getHostname"
        func_str = "{}::{}() - ".format(module_name, func_name)
        try:
            import socket

            hostname = socket.getfqdn()
            return hostname
        except:
            LOGGER_Handler.exception("Error while getting Hostname", func_str)

    def analyzeProfileMessages(self, message):
        module_name = "CVEEHandler"
        func_name = "analyzeProfileMessages"
        func_str = "{}::{}() - ".format(module_name, func_name)
        # Function to analyse and aggregate the profiling
        # messages and write aggregated messages to CSV
        try:
            result = {}
            for key, val in list(message.items()):
                result[key] = str(val)
            result = self.cleanMessage(result)
            self.writeToCsv(result)
        except:
            LOGGER_Handler.exception("Error while analysing Profiler messages", func_str)

    def cleanMessage(self, result):
        module_name = "CVEEHandler"
        func_name = "cleanMessage"
        func_str = "{}::{}() - ".format(module_name, func_name)
        # Funtion to add default keys if they're not already
        # present in message
        try:
            if "systemCPULoadPercentage" not in result:
                result["systemCPULoadPercentage"] = str(self.getCPUUsage())
            if "systemMemoryUsagePercentage" not in result:
                result["systemMemoryUsagePercentage"] = str(self.getMemoryUsage())

            default_keys_set = set(self.defaults.keys())
            result_keys_set = set(result.keys())
            keys_difference = default_keys_set - result_keys_set

            for key in keys_difference:
                result[key] = self.defaults[key]

        except:
            LOGGER_Handler.exception("There was an error while cleaning the messages", func_str)

        return result

    def run(self):
        module_name = "CVEEHandler.ProfilerReader"
        func_name = "run"
        func_str = "{}::{}() - ".format(module_name, func_name)
        # Function which will listen to the profiling queue,
        # and send messages for analysis
        self.connectToQueue()
        LOGGER_Handler.info("Started Profiler Thread", func_str)
        self._stop.clear()

        while not self._stop.isSet():
            try:
                frame = self.getFrame(self.clients[self.profilingQueue]["client"])
                if frame == None:
                    continue
                payload = ujson.loads(frame)
                self.analyzeProfileMessages(payload)
                LOGGER_Handler.debug(
                    "Received message {0} from Profiling".format(payload), func_str
                )
            except:
                LOGGER_Handler.exception("Exception while reading from Profiling queue", func_str)
                self.connected = False
                self.connectToQueue()

        self.clients[self.profilingQueue]["client"].disconnect()

    def stop(self):
        module_name = "CVEEHandler.ProfilerReader"
        func_name = "stop"
        func_str = "{}::{}() - ".format(module_name, func_name)
        try:
            LOGGER_Handler.info("Stopping Profiling Thread.", func_str)
            self._stop.set()
        except:
            LOGGER_Handler.exception("Error while stopping profiler", func_str)


def HandleRequests(urlparameters, body):
    actionTable = {
        "/" + handleKey + "/ping": ping,
        "/" + handleKey + "/startee": startEE,
        "/" + handleKey + "/stopee": stopEE,
        "/" + handleKey + "/wakeee": wakeEE,
        "/" + handleKey + "/startgenericee": startGenericEE,
        "/" + handleKey + "/stopgenericee": stopGenericEE,
    }

    path = urlparameters.path

    if path[-1] == "/":
        path = path[:-1]

    if path in actionTable:
        return actionTable[path](urlparameters, body)
    else:
        return "Unknown Entity Extraction Resource Requested"


def mlflow_server(parent_pid):
    checkParentAndKill(parent_pid, os.getpid())
    try:
        from mlflow.server.handlers import initialize_backend_stores
        from mlflow.server import _run_server

        # get port number for mlflow server
        mlflow_server_port = loadRegValue("bMlflowServerPort", 5004)

        DEFAULT_TRACKING_URI = str(Path.cwd()) + str(
            Path("\\..\\ContentAnalyzer\\bin\\classifier_models\\custom_trained_models\\mlruns")
        )
        DEFAULT_TRACKING_URI = "file://" + DEFAULT_TRACKING_URI.replace("\\", "/")
        backend_store_uri = DEFAULT_TRACKING_URI
        default_artifact_root = DEFAULT_TRACKING_URI

        initialize_backend_stores(backend_store_uri, default_artifact_root)
        host = "127.0.0.1"
        port = mlflow_server_port
        _run_server(backend_store_uri, default_artifact_root, host, port, None, 1)
        LOGGER_Handler.info(
            f"Started mflow server on url http:\\{host}:{port} in process id {os.getpid()}"
        )
    except Exception as e:
        LOGGER_Handler.exception(f"Failed to start mlflow server. Exception {e}")


def start_mlflow_server():
    try:
        server_process = Process(target=mlflow_server, args=(os.getpid(),))
        server_process.daemon = True
        server_process.start()
    except Exception as e:
        LOGGER_Handler.exception(f"Failed to start mlflow server. Exception {e}")


def cleanup_stale_models():
    module_name = "CVEEHandler"
    func_name = "cleanup_stale_models"
    func_str = "{}::{}() - ".format(module_name, func_name)

    solrTaggerPort = loadRegValue(SOLR_TAGGER_PORT, 22000)
    SOLR_URL = "http://localhost:{}/solr".format(solrTaggerPort)
    CLEANUP_STALE_MODELS_TIME = 24 * 60 * 60  # 24 hours
    DEFAULT_TRACKING_URI = str(Path.cwd()) + str(
        Path("\\..\\ContentAnalyzer\\bin\\classifier_models\\custom_trained_models\\mlruns")
    )
    logging = LOGGER_Handler
    try:
        if not Path(DEFAULT_TRACKING_URI).exists():
            return
        from cvee_get_entities import get_entities, entities_mapping, EEEntityType
        from cvee_solr_helper import SolrHelper
        from mlflow.tracking import MlflowClient
        from mlflow.tracking.fluent import ViewType

        # get all the entities (including disabled) (non-cached)
        params = {
            "retry": True,
            "get_disabled": True,
            "ee_cache_token": str(int(time.time())),
        }
        entity_details_resp = get_entities(params)
        ml_entities = entities_mapping(entity_details_resp, entityType=EEEntityType.ML_MODEL.value)
        possible_experiments = set()
        # populate all possible experiment names to cross check
        for _, entity_detail in ml_entities.items():
            possible_experiments.add(f"{entity_detail.entityKey}_{entity_detail.entityId}")        
        mlflow_client = MlflowClient("file://" + DEFAULT_TRACKING_URI.replace("\\", "/"))
        # get all available experiments (active + inactive)
        experiments = mlflow_client.list_experiments(ViewType.ALL)
        datasets_solr_helper = SolrHelper(logging, SOLR_URL, "datasets")
        datasets_info_solr_helper = SolrHelper(logging, SOLR_URL, "datasets_info")
        for exp in experiments:
            experiment_name = None
            try:
                experiment_name = exp.name
                if experiment_name not in possible_experiments and experiment_name != "Default":
                    entity_id = int(experiment_name.split("_")[-1])
                    dataset_name = "_".join(experiment_name.split("_")[:-1])
                    logging.info(
                        f"Starting clean up for entity [{dataset_name}] with ID [{entity_id}]",
                        func_str,
                    )
                    # if experiment exists but entity got deleted
                    # delete experiment (this is soft delete, it will be clear later from .trash folder)
                    mlflow_client.delete_experiment(exp.experiment_id)
                    logging.info(f"Successfully deleted experiment [{experiment_name}]", func_str)
                    # clean up solr data for deleted entity
                    datasets_solr_helper.delete(f"entity_id:{entity_id}")
                    datasets_info_solr_helper.delete(f"dataset_id:{dataset_name}")
                    logging.info(f"Successfully deleted solr data for [{dataset_name}]", func_str)
            except Exception as e:
                logging.error(
                    f"Failed to cleanup existing model [{experiment_name}] ID [{exp.experiment_id}]. Exception {e}"
                )

        # try cleaning trash folder for permanent deletion of models
        trash_path = Path(DEFAULT_TRACKING_URI) / ".trash"
        if trash_path.exists():
            for deleted_model in trash_path.glob("**/*"):
                try:
                    logging.info(
                        f"Attempting permanent delete for experiment [{str(deleted_model)}]"
                    )
                    shutil.rmtree(str(deleted_model))
                    logging.info(
                        f"Permanent delete successful for experiment [{str(deleted_model)}]"
                    )
                except:
                    logging.error(f"Unable to clean up [{str(deleted_model)}]")
    except Exception as e:
        logging.exception(f"Failed to cleanup existing models. Exception {e}")
    finally:
        Timer(CLEANUP_STALE_MODELS_TIME, cleanup_stale_models).start()


def resume_training():
    module_name = "CVEEHandler"
    func_name = "resume_training"
    func_str = "{}::{}() - ".format(module_name, func_name)

    logging = LOGGER_Handler
    try:
        from cvee_get_entities import (
            get_entities,
            entities_mapping,
            EEEntityType,
            ModelTrainingStatus,
        )

        # get all the entities (non-cached)
        params = {
            "retry": True,
            "get_disabled": False,
            "ee_cache_token": str(int(time.time())),
        }
        entity_details_resp = get_entities(params)
        ml_entities = entities_mapping(entity_details_resp, entityType=EEEntityType.ML_MODEL.value)
        if len(ml_entities) == 0:
            return
        from mlflow import get_experiment_by_name
        from mlflow.tracking.fluent import search_runs, ViewType
        from cvee_train_classifier import train_classifier

        for _, entity_detail in ml_entities.items():
            # check if entity is in training started mode            
            if entity_detail.entityXML.classifierDetails is not None and str(
                entity_detail.entityXML.classifierDetails.trainingStatus
            ) == str(ModelTrainingStatus.TRAINING_STARTED.value):
                # check if entity is local to this Content Analyzer for retraining
                dataset_name = entity_detail.entityKey
                experiment_name = f"{dataset_name}_{entity_detail.entityId}"
                experiment = get_experiment_by_name(experiment_name)
                if experiment is not None and type(experiment) != str:
                    logging.debug(f"Existing experiment found for dataset {dataset_name}")
                    experiment_id = experiment.experiment_id
                    # get latest run for the experiment
                    latest_run = search_runs(
                        experiment_ids=[str(experiment_id)],
                        filter_string="",
                        run_view_type=ViewType.ALL,
                        max_results=1,
                    )
                    if len(latest_run) > 0:
                        latest_run_dict = latest_run.iloc[0].to_dict()
                        if latest_run_dict["status"] == "RUNNING":
                            logging.info(
                                f"Attempting retraining for entity [{entity_detail.entityName}] with entity Id [{entity_detail.entityId}]"
                            )
                            train_classifier(entity_detail.entityId, dataset_name)
    except Exception as e:
        logging.exception(f"Failed to resume failed trainings. Exception [{e}]")


def startEE():
    global PROCESS_RESTART_TIME, PROFILER, REG_CONF, ENTITIES_ATTRIBUTES, COMMSERVER_REACHABLE
    try:
        ENTITIES_ATTRIBUTES = loadEntities(
            COMMSERVER_REACHABLE=COMMSERVER_REACHABLE, logger=LOGGER_Handler
        )
    except Exception as e:
        COMMSERVER_REACHABLE.value = 0
        LOGGER_Handler.exception(
            "Failed to get the entities from database. Please verify that Commserver is reachable.",
            func_str,
        )
    publisherResponse = startEEPublisher()
    clientResponse = startEEClient()
    checkAndRestartClients()
    checkAndRestartPublisher()
    threading.Timer(PROCESS_RESTART_TIME, putStopCommand).start()

    genericResponse = startGenericEE()
    checkAndRestartGenericClients()
    start_mlflow_server()
    # get previous ML entities which are still in TRAINING_STARTED, retrain these classifiers
    resume_training()
    # cleanup previous deleted classifiers
    cleanup_stale_models()
    if REG_CONF["profiling"]["value"]:
        PROFILER = ProfilerReader()
        PROFILER.start()
    checkIfCommserverAvailable()
    return publisherResponse + " " + clientResponse + " " + genericResponse


def stopEE():
    module_name = "CVEEHandler"
    func_name = "stopEE"
    func_str = "{}::{}() - ".format(module_name, func_name)
    global PROFILER
    if REG_CONF["profiling"]["value"]:
        PROFILER.stop()
    LOGGER_Handler.info("Stop command received. Stopping Entity Extraction", func_str)
    stopPublisher()
    stopClients()
    stopGenericEE()
    LOGGER_Handler.info("Stopped Entity Extraction processes", func_str)
    return "Processes stopped successfully"


def spawn_client(process_index, shared_tasks=None):
    func_name = "spawn_client"
    func_str = "{}::{}() - ".format(module_name, func_name)
    global eeGenericProcess, cmdQG, process_task_index_mapping

    import CvCAGenericClient as cl

    if shared_tasks is None:
        return
    cmdQG[process_index] = Queue()
    params = {
        "cmdQ": cmdQG[process_index],
        "conf": REG_CONF,
        "id": process_index + 1,
        "parent_pid": PROCESS_ID,
        "shared_tasks": shared_tasks,
        "commserver_reachable": COMMSERVER_REACHABLE,
    }
    process_task_index_mapping[process_index] = shared_tasks
    if "SP_NER" in shared_tasks:
        params["is_dummy_process"] = IS_DUMMY_NER_PROCESS
        if IS_DUMMY_NER_PROCESS is True:
            LOGGER_Handler.info(
                "Dummy process spawned for Named Entity Recognition because of insufficient system resources. Available Memory {} GB, Total CPU cores {}".format(
                    get_available_memory(), get_cpu_cores()
                )
            )

    eeGenericProcess[process_index] = Process(target=cl.doProcessing, kwargs=params)
    if "DOC_TAGGER" not in shared_tasks:
        eeGenericProcess[process_index].daemon = True
    eeGenericProcess[process_index].start()
    LOGGER_Handler.info(
        "Generic Client {0} started. Process ID is {1}".format(
            process_index + 1, eeGenericProcess[process_index].pid
        ),
        func_str,
    )


def startGenericEE():
    module_name = "CVEEHandler"
    func_name = "startGenericEE"
    func_str = "{}::{}() - ".format(module_name, func_name)
    global eeGenericProcess, cmdQG, PROCESS_ID, ENTITIES_ATTRIBUTES, IS_DUMMY_NER_PROCESS, process_task_index_mapping
    if eeGenericProcess is None or cmdQG is None:
        ids = []

        num_spner_clients = NO_SP_NER_PROCESSES
        num_rer_de_clients = NO_GENERIC_PROCESSES - NO_SP_NER_PROCESSES
        num_doc_tagger_clients = NUM_DOC_TAGGER_PROCESSES
        num_email_tagger_clients = 1

        total_clients = (
            num_spner_clients
            + num_rer_de_clients
            + num_doc_tagger_clients
            + num_email_tagger_clients
        )
        eeGenericProcess = [None] * total_clients
        cmdQG = [None] * total_clients
        process_task_index_mapping = [None] * total_clients
        process_index = 0
        for _ in range(0, num_rer_de_clients):
            spawn_client(process_index, shared_tasks=["RER", "DE"])
            process_index += 1

        for _ in range(0, num_spner_clients):
            spawn_client(process_index, shared_tasks=["SP_NER"])
            process_index += 1

        for _ in range(0, num_doc_tagger_clients):
            spawn_client(process_index, shared_tasks=["DOC_TAGGER"])
            process_index += 1

        for _ in range(0, num_email_tagger_clients):
            spawn_client(process_index, shared_tasks=["EMAIL_TAGGER"])
            process_index += 1

            # ids.append(str(eeGenericProcess[i].pid))
        return "RER Process started successfully: {}".format(eeGenericProcess[0].pid)
    else:
        ids = []
        for p in eeGenericProcess:
            if p is not None:
                ids.append(str(p.pid))
        return "Following Generic Processes already running: {}".format(",".join(ids))


def stopGenericEE():
    global cmdQG, eeGenericProcess
    if cmdQG is not None:
        for i in range(0, NO_GENERIC_PROCESSES):
            cmdQG[i].put("stop")
    time.sleep(5)
    if eeGenericProcess is not None:
        for i in range(0, NO_GENERIC_PROCESSES):
            eeGenericProcess[i].terminate()
    eeGenericProcess = None
    return "Generic Processes stopped successfully"


def putStopCommand():
    module_name = "CVEEHandler"
    func_name = "putStopCommand"
    func_str = "{}::{}() - ".format(module_name, func_name)
    global PROCESS_RESTART_TIME
    global cmdQC
    LOGGER_Handler.debug("Putting Stop Command", func_str)
    if cmdQC is not None:
        for i in range(0, len(cmdQC)):
            cmdQC[i].put("stop")
        threading.Timer(PROCESS_RESTART_TIME, putStopCommand).start()


def checkAndRestartClients():
    module_name = "CVEEHandler"
    func_name = "checkAndRestartClients"
    func_str = "{}::{}() - ".format(module_name, func_name)
    global eeClientProcess, cmdQC, REG_CONF, PROCESS_ID, ENTITIES_ATTRIBUTES
    LOGGER_Handler.debug("Checking if any client is dead", func_str)
    if eeClientProcess is not None:
        for i in range(0, len(eeClientProcess)):
            if not eeClientProcess[i].is_alive():
                cmdQC[i] = Queue()
                eeClientProcess[i] = Process(
                    target=main_client,
                    args=(cmdQC[i], REG_CONF, i + 1, PROCESS_ID, ENTITIES_ATTRIBUTES),
                )
                eeClientProcess[i].daemon = True
                eeClientProcess[i].start()
                LOGGER_Handler.info(
                    "Restarted Client process {0}. Process ID is {1}".format(
                        i + 1, eeClientProcess[i].pid
                    ),
                    func_str,
                )
        threading.Timer(CLIENTS_CHECK_TIMER, checkAndRestartClients).start()


def checkAndRestartGenericClients():
    module_name = "CVEEHandler"
    func_name = "checkAndRestartGenericClients"
    func_str = "{}::{}() - ".format(module_name, func_name)
    global eeGenericProcess, cmdQG, REG_CONF, PROCESS_ID, ENTITIES_ATTRIBUTES, IS_DUMMY_NER_PROCESS, NO_SP_NER_PROCESSES, process_task_index_mapping
    LOGGER_Handler.debug("Checking if any generic client is dead", func_str)
    if eeGenericProcess is not None:
        import CvCAGenericClient as cl

        for i in range(0, len(eeGenericProcess)):
            if not eeGenericProcess[i].is_alive():
                spawn_client(i, process_task_index_mapping[i])
                LOGGER_Handler.info(
                    "Restarted Generic Client process {0}. Process ID is {1}".format(
                        i + 1, eeGenericProcess[i].pid
                    ),
                    func_str,
                )
        threading.Timer(GENERIC_CLIENTS_CHECK_TIMER, checkAndRestartGenericClients).start()


def checkIfCommserverAvailable():
    global COMMSERVER_REACHABLE
    try:
        loadEntities(retry=True, COMMSERVER_REACHABLE=COMMSERVER_REACHABLE, logger=LOGGER_Handler)
    except:
        pass
    threading.Timer(CHECK_COMMSERVER_CONNECTIVITY, checkIfCommserverAvailable).start()


def checkAndRestartPublisher():
    module_name = "CVEEHandler"
    func_name = "checkAndRestartPublisher"
    func_str = "{}::{}() - ".format(module_name, func_name)
    try:
        global eePublisherProcess, cmdQP, PROCESS_ID
        LOGGER_Handler.debug("Checking if Publisher is dead", func_str)
        if eePublisherProcess is not None and not eePublisherProcess.is_alive():
            cmdQP = Queue()
            eePublisherProcess = Process(target=main_publisher, args=(cmdQP, REG_CONF, PROCESS_ID))
            eePublisherProcess.daemon = True
            eePublisherProcess.start()
            LOGGER_Handler.info(
                "Restarted Publisher process. Process ID is {}".format(eePublisherProcess.pid),
                func_str,
            )
    except Exception as e:
        LOGGER_Handler.exception("Exception while restarting publisher", func_str)

    threading.Timer(PUBLISHER_SLEEP_TIMER, checkAndRestartPublisher).start()


def stopPublisher():
    global eePublisherProcess, cmdQP
    if eePublisherProcess is not None:
        cmdQP.put("stop")
        time.sleep(5)
        eePublisherProcess.terminate()
    eePublisherProcess = None


def stopClients():
    global eeClientProcess, cmdQC, NO_PROCESSES
    if eeClientProcess is not None and cmdQC is not None:
        for i in range(0, NO_PROCESSES):
            cmdQC[i].put("stop")
        time.sleep(5)
        for i in range(0, NO_PROCESSES):
            eeClientProcess[i].terminate()

    eeClientProcess = None


def wakeEE():
    # publisherResponse = wakeUpEEPublisher()
    stopEE()
    time.sleep(10)
    startEE()
    return "Process awoken successfully"  # publisherResponse


def startEEClient():
    module_name = "CVEEHandler"
    func_name = "startEEClient"
    func_str = "{}::{}() - ".format(module_name, func_name)

    global eeClientProcess, cmdQC, NO_PROCESSES, pids, REG_CONF, PROCESS_ID, ENTITIES_ATTRIBUTES

    if eeClientProcess == None:
        pids = ""
        eeClientProcess = [None] * NO_PROCESSES
        cmdQC = [None] * NO_PROCESSES
        for i in range(0, NO_PROCESSES):
            cmdQC[i] = Queue()
            eeClientProcess[i] = Process(
                target=main_client,
                args=(cmdQC[i], REG_CONF, i + 1, PROCESS_ID, ENTITIES_ATTRIBUTES),
            )
            eeClientProcess[i].daemon = True
            eeClientProcess[i].start()
            LOGGER_Handler.info(
                "Client {0} started. Process ID is {1}".format(i + 1, eeClientProcess[i].pid),
                func_str,
            )
            pids += str(eeClientProcess[i].pid) + ","
        return "Client Process started successfully: %s" % pids[:-1]
    else:
        return "Client Process is already running: %s" % pids[:-1]


def startEEPublisher():
    module_name = "CVEEHandler"
    func_name = "startEEPublisher"
    func_str = "{}::{}() - ".format(module_name, func_name)

    global eePublisherProcess, cmdQP, REG_CONF, PROCESS_ID

    if eePublisherProcess == None:
        cmdQP = Queue()
        eePublisherProcess = Process(target=main_publisher, args=(cmdQP, REG_CONF, PROCESS_ID))
        eePublisherProcess.daemon = True
        eePublisherProcess.start()
        LOGGER_Handler.info(
            "Publisher process started. Process id is {}".format(eePublisherProcess.pid), func_str
        )
        return "Publisher Process started successfully: %d" % eePublisherProcess.pid
    else:
        return "Publisher Process is already running: %d" % eePublisherProcess.pid


def wakeUpEEPublisher():
    startEEPublisher()
    global cmdQP
    cmdQP.put("wakeup")
    return "Process awoken successfully"


def ping():
    return "I am alive From CV Entity Extraction Handler"


# Running it as a script will and should run test cases (both unit and regression)
if __name__ == "__main__":
    module_name = "CVEEHandler"
    func_name = "__main__"
    func_str = "{}::{}() - ".format(module_name, func_name)
    if len(sys.argv) < 2:
        print("Please provide process ID")
    else:
        try:
            parent_pid = int(sys.argv[1])
        except Exception as e:
            print("Unable to convert PID to int: {}".format(e))
            sys.exit()
        current_pid = os.getpid()

        checkParentAndKill(parent_pid, current_pid)
        loadRegistryValues()
        initLogging()
        cmdHandler = CommandHandler()
        LOGGER_Handler.info(
            "Handler process started. It will exit if Message Queue is down.", func_str
        )
        try:
            cmdHandler.run()
        except KeyboardInterrupt:
            cmdHandler.stop()
        except Exception as e:
            LOGGER_Handler.error(f"Exception occurred. {e}", func_str)
            cmdHandler.stop()
        LOGGER_Handler.info("Handler process stopped successfully.", func_str)
        killProcessAndChildren(current_pid)
