import os
import sys

import re2
import ujson
from CvCAProximityConf import ProximityBasedConfidence
from CvEEConfigHelper import (
    CA_ERROR_CODES,
    WORD_BOUNDARY_END,
    WORD_BOUNDARY_START,
    cleanRawText,    
    getBaseDir,
    HELPER_DLL,
    ROTATING_BACKUP_COUNT,
    ROTATING_MAX_BYTES,
)
from CvCAGenericLogger import get_logger_handler
from CvCAPerfCounters import PerformanceCounter

logger_options = {
    "ROTATING_BACKUP_COUNT": ROTATING_BACKUP_COUNT,
    "ROTATING_MAX_BYTES": ROTATING_MAX_BYTES,
}
logger = get_logger_handler(
    os.path.join(getBaseDir(), HELPER_DLL), "ContentAnalyzer", logger_options
)
PERF_COUNTER = PerformanceCounter(logger)
PERF_COUNTER.periodicLogTimer()

def preProcess(params={}):
    return {"ErrorCode": CA_ERROR_CODES["success"]}


def escapeSpecialChars(entity):
    charlist = "[]\\^$.|?*+(){}"
    result_str = ""
    for c in entity:
        if c in charlist:
            result_str += "\\" + c
        else:
            result_str += c
    return result_str


def doAnalysis(processing_input, params={}):

    # Main function which will do the NER processing,
    # format the results, and return them.

    entities = {}
    contentid = ""
    global ENTITIES_PARENTS, DE_RANGE, ENTITIES_KEYWORDS, ENTITIES_NAMES, ENTITIES_KEYS
    prox_conf_params = {}
    try:
        content = ""

        ENTITIES_KEYS = params["entities_attributes"]["entities_keys"]
        ENTITIES_NAMES = params["entities_attributes"]["entities_names"]
        ENTITIES_PARENTS = params["entities_attributes"]["entities_parents"]
        ENTITIES_KEYWORDS = params["entities_attributes"]["entities_keywords"]
        DE_RANGE = params["entities_attributes"]["de_range"]

        processing_input = ujson.loads(processing_input)
        if "parent_tmp_file" in params:
            with open(params["parent_tmp_file"], encoding="utf-8") as fp:
                json_content = cleanRawText(fp.read())
                if "EntityExtractionFields" in params:
                    params["EntityExtractionFields"] = params["EntityExtractionFields"].strip(",").split(",")
                else:
                    params["EntityExtractionFields"] = ["content"]
                try:
                    jsonified_input = ujson.loads(json_content)
                    optTypedata = {}
                    content = ""
                    for item in jsonified_input["dataList"]["optTypedata"]:
                        if (
                            "attrKey" in item
                            and "attrValue" in item
                            and item["attrKey"] in params["EntityExtractionFields"]
                        ):
                            content += (
                                f"{item['attrKey']}{os.linesep}"
                                if item["attrKey"] != "content"
                                else ""
                            ) + f"{item['attrValue']}{os.linesep}"
                except Exception as e:
                    params["logger"].info(f"Exception occurred {e}")
                    content = json_content
        elif "content" in params:
            content = params["content"]

        if "proximity_conf" in params:
            prox_conf_params = params["proximity_conf"]

        childDocs = False
        if "childDocs" in params:
            childDocs = params["childDocs"]

        proxConf = ProximityBasedConfidence(prox_conf_params)

        parent_entities = list(ENTITIES_PARENTS.keys())
        input_entities = {}
        response_entities = []
        if "ErrorCode" in processing_input:
            del processing_input["ErrorCode"]
        if "ErrorMessage" in processing_input:
            del processing_input["ErrorMessage"]
        if "0" in processing_input or "1" in processing_input or "2" in processing_input:
            for bucket_key in list(processing_input.keys()):
                for entity_key in list(processing_input[bucket_key].keys()):
                    input_entities[entity_key] = processing_input[bucket_key][entity_key]
        else:
            input_entities = processing_input.copy()

        lowerCaseKeys = {}
        for k in list(ENTITIES_KEYS.keys()):
            if isinstance(k, str):
                lowerCaseKeys[k.lower()] = ENTITIES_KEYS[k]

        for k in input_entities:
            if k.lower() in ENTITIES_NAMES:
                response_entities.append(ENTITIES_NAMES[k.lower()])
            elif k.lower() in lowerCaseKeys:
                response_entities.append(lowerCaseKeys[k.lower()])

        entities_to_process = set(response_entities).intersection(set(parent_entities))

        entities = {}
        results = {0: {}, 1: {}, 2: {}}
        merged_results = {}
        entities_dict = {}
        for parent in entities_to_process:
            for ent_ in ENTITIES_PARENTS[parent]:
                PERF_COUNTER.start_stopwatch(ENTITIES_KEYS[ent_])
                if ent_ in DE_RANGE:
                    proxConf.PROXIMITY_CONSTANTS["range"] = DE_RANGE[ent_]

                if ent_ in ENTITIES_KEYWORDS:
                    stringified_keywords = "|".join(ENTITIES_KEYWORDS[ent_])
                else:
                    stringified_keywords = ""

                keywords_regex = (
                    WORD_BOUNDARY_START + "(?:(?:{}))" + WORD_BOUNDARY_END).format(stringified_keywords)
                # pattern_regex = "(?P<loose>(?:"
                pattern_found = []
                for r in input_entities[ENTITIES_KEYS[parent]]:
                    pattern_found.append(escapeSpecialChars(r))
                                    
                # params["logger"].error(f"Pattern regex {pattern_regex} Keywords_regex {keywords_regex}")
                entities = proxConf.proximalMatch(pattern_found, keywords_regex, content, ENTITIES_KEYS[ent_])

                # to handle multiple occurrences of an entity we are storing a tuple instead of plain entity_text
                # we need to handle this breakage from SP13 as the original change was introduced there.
                for entity_tuple, v in list(entities.items()):
                    k = entity_tuple[0]
                    if v["conf"] > proxConf.PROXIMITY_CONSTANTS["base_conf"]:
                        bucket = v["bucket"]
                        if ENTITIES_KEYS[ent_] not in results[bucket]:
                            results[bucket][ENTITIES_KEYS[ent_]] = []
                        if ENTITIES_KEYS[ent_] not in merged_results:
                            merged_results[ENTITIES_KEYS[ent_]] = []
                            entities_dict[ENTITIES_KEYS[ent_]] = {}
                            

                        if k.lower() not in entities_dict[ENTITIES_KEYS[ent_]]:
                            entities_dict[ENTITIES_KEYS[ent_]][k.lower()] = k
                        
                        results[bucket][ENTITIES_KEYS[ent_]].append(entities_dict[ENTITIES_KEYS[ent_]][k.lower()])
                        merged_results[ENTITIES_KEYS[ent_]].append(entities_dict[ENTITIES_KEYS[ent_]][k.lower()])                            
                PERF_COUNTER.stop_stopwatch(ENTITIES_KEYS[ent_], size=len(content), entity_count=len(entities))
        if "ErrorCode" not in results:
            results["ErrorCode"] = CA_ERROR_CODES["success"]
            results["ErrorMessage"] = None
        for key in ["ErrorCode", "ErrorMessage"]:
            merged_results[key] = results[key]

        if childDocs:
            return results
        return merged_results

    except Exception as e:
        params["logger"].exception("There was an error during DE")
        return {
            "ErrorCode": CA_ERROR_CODES["SpacyError"],
            "ErrorMessage": "Derived entities failure: {}".format(e),
        }


if __name__ == "__main__":

    # Unit Tests
    params = preProcess()
    print(doAnalysis("Hello. I am Rob, and I work in XYZ Inc.", params))
