import bisect
import re2, re

from collections import Counter, namedtuple
from functools import lru_cache

from flashtext import KeywordProcessor

from CvEEConfigHelper import custom_trim

EntityCount = namedtuple("EntityCount", "text count")
EntityList = namedtuple("EntityList", ("is_entity",) + EntityCount._fields)

@lru_cache()
def get_pattern_keyword_regex(pattern_regex, keywords_regex):
    pattern_regex = re2.compile(pattern_regex, re.DOTALL | re.IGNORECASE | re.MULTILINE)
    keywords_regex = re2.compile(keywords_regex, re.DOTALL | re.IGNORECASE | re.MULTILINE)
    return pattern_regex, keywords_regex

class ProximityBasedConfidence:
    def __init__(self, extra_params={}):
        self.PROXIMITY_CONSTANTS = {
            "base_conf": 50,
            "upper_conf": 90,
            "range": 300,
            "max_matches": 3,
            "boost": 0.25,
            "buckets": {50: 0, 60: 1, 70: 2},
        }
        for key in extra_params:
            self.PROXIMITY_CONSTANTS[key] = extra_params[key]

        # These will be populated by default by the parent and
        # can be utilised directly in this class
        self.perfCounter = None
        self.logger = None

    def getBucket(self, conf):
        values = sorted(list(self.PROXIMITY_CONSTANTS["buckets"].keys()), reverse=True)
        for v in values:
            if conf > int(v):
                return self.PROXIMITY_CONSTANTS["buckets"][v]
        return self.PROXIMITY_CONSTANTS["buckets"][values[-1]]

    def findSpans(self, text, patternID, list_entities, matches):
        # Function to iterate over the given list of entities
        # sequentially and find the span of each entity

        keywords = set()
        startIndex = 0
        for entity in list_entities:
            start = text.find(entity.text, startIndex)
            end = start + len(entity.text)
            if entity.is_entity:
                matches[(entity.text, entity.count)]["span"] = (start, end)
            else:
                keywords.add((start, end))
            startIndex = end + 1
        return keywords, matches
        
    
    
    def proximalMatch(self, pattern_regex, keywords_regex, text, patternID):
        keywords = set()
        matches = {}
        found_texts_count = Counter()
        keywords_found = []    
        entities_found = []
        if patternID == "dob":
            range_ = 25
        elif patternID == "phone":
            range_ = 50
        else:
            range_ = self.PROXIMITY_CONSTANTS["range"]        
        if type(pattern_regex) == list:
            _, keywords_regex = get_pattern_keyword_regex("", keywords_regex)
        else:
            pattern_regex, keywords_regex = get_pattern_keyword_regex(pattern_regex, keywords_regex)
        
        for match_ in keywords_regex.finditer(text):
            keywords.add(match_.span())        
                        
        if type(pattern_regex) == list:
            keyword_processor = KeywordProcessor(case_sensitive=False)
            keyword_processor.add_keywords_from_list(pattern_regex)
            keywords_found = keyword_processor.extract_keywords(text, span_info=True)
            for keyword_, start, end in keywords_found:
                matches[(keyword_,start, end)] = {
                        "strict": False,
                        "conf": self.PROXIMITY_CONSTANTS["base_conf"],                    
                        "bucket": self.getBucket(self.PROXIMITY_CONSTANTS["base_conf"]),
                    }
        else:            
            for match_ in pattern_regex.finditer(text):
                output_dict = match_.groupdict()
                strict_ = None
                loose_ = None
                match_text = custom_trim(match_.group())
                if "strict" in output_dict and output_dict["strict"] is not None:
                    matches[(match_text,) + match_.span("strict")] = {
                        "strict": True,
                        "conf": self.PROXIMITY_CONSTANTS["upper_conf"],
                        "bucket": self.getBucket(self.PROXIMITY_CONSTANTS["upper_conf"]),
                    }
                elif "loose" in output_dict and output_dict["loose"] is not None:                
                    matches[(match_text,) + match_.span("loose")] = {
                        "strict": False,
                        "conf": self.PROXIMITY_CONSTANTS["base_conf"],                    
                        "bucket": self.getBucket(self.PROXIMITY_CONSTANTS["base_conf"]),
                    }     
                
        prox_conf = self.findProximityConfidence(list(keywords), matches, range_)        
        return prox_conf
    
    def findProximityConfidence(self, keywords, matches, range_):
        keywords = list(keywords)
        keywords.sort()                
        keywords_len = len(keywords)        
        for span_info in list(matches.keys()):
            if not matches[span_info]["strict"]:

                close_keywords = 0

                entity_start = span_info[1]
                entity_end = span_info[2]

                entity_position = (
                    entity_start + entity_end
                ) / 2.0
                
                if entity_start > range_:
                    closest_keyword_index = bisect.bisect_left(keywords, (entity_start - range_ -1, entity_end - range_ -1))
                else:
                    closest_keyword_index = 0                
                
                range_start = closest_keyword_index
                range_end = min(range_start + 4, keywords_len)
                for keyword_index in range(range_start, range_end):
                    k = keywords[keyword_index]
                    keyword_position = (k[0] + k[1]) / 2.0

                    if abs(keyword_position - entity_position) <= range_:

                        percentage_closeness = (
                            abs(keyword_position - entity_position) * 100.0
                        ) / range_
                        inverse_percentage = 100.0 - percentage_closeness
                        scale_to_boostunit = inverse_percentage * self.PROXIMITY_CONSTANTS["boost"]

                        close_keywords += 1
                        matches[span_info]["conf"] += scale_to_boostunit
                        matches[span_info]["close_keywords"] = close_keywords                        
                        matches[span_info]["bucket"] = self.getBucket(
                            matches[span_info]["conf"]
                        )
                        if (
                            close_keywords >= self.PROXIMITY_CONSTANTS["max_matches"]
                            or matches[span_info]["conf"]
                            >= self.PROXIMITY_CONSTANTS["upper_conf"]
                        ):
                            matches[span_info]["conf"] = self.PROXIMITY_CONSTANTS["upper_conf"]                            
                            matches[span_info]["bucket"] = self.getBucket(
                                self.PROXIMITY_CONSTANTS["upper_conf"]
                            )
                            break                            
        return matches

    
    def processCCNResult(self, result_set):
        # Check the capturing group in which match is found
        # Then, accordingly, return the match, the card_type
        # and whether it is a strict match or a loose match

        (g0, g1, g2, g3, g4, g5, g6, g7, g8, g9, g10, g11, g12) = result_set
        if g0 != None:
            return "keyword"
        elif g1 != None:
            return (g1, "MasterCard", False)
        elif g2 != None:
            if g3 != None:
                return (g2, "Visa 1", False)
            else:
                return (g2, "Visa 2", False)
        elif g4 != None:
            return (g4, "Amex", False)
        elif g5 != None:
            return (g5, "Discover", False)
        elif g6 != None:
            return (g6, "Diners Club", False)
        elif g7 != None:
            return (g7, "MasterCard", True)
        elif g8 != None:
            if g9 != None:
                return (g8, "Visa 1", True)
            else:
                return (g8, "Visa 2", True)
        elif g10 != None:
            return (g10, "Amex", True)
        elif g11 != None:
            return (g11, "Discover", True)
        elif g12 != None:
            return (g12, "Diners Club", True)
