import os

from spacy.matcher import Matcher
from spacy.tokens import Token

from CvNERCustomModels import IsAddressClassifier


class AddressComponent(object):
    """
    Set address attribute to the matched tokens and utilize it later in filter component.
    """

    is_address_classifier = ""

    def __init__(self, nlp, params=None):
        self.nlp = nlp
        self.matcher = Matcher(nlp.vocab)
        self.params = params
        AddressComponent.is_address_classifier = IsAddressClassifier(params)
        self.filtered_addresses = []
        self.added_addresses = []
        self.matcher.add(
            "address",
            None,
            [
                {"ent_type": "LOCATION", "OP": "+"},
                {"POS": "SPACE", "OP": "?"},
                {"POS": "PUNCT", "OP": "?"},
                {"POS": "SPACE", "OP": "?"},
                {"LENGTH": 2, "IS_UPPER": True, "IS_ASCII": True},
            ],
        )
        self.matcher.add(
            "address",
            None,
            [
                {"ent_type": "LOCATION", "OP": "+"},
                {"POS": "SPACE", "OP": "?"},
                {"POS": "PUNCT", "OP": "?"},
                {"POS": "SPACE", "OP": "?"},
                {"POS": "NUM"},
            ],
        )
        self.matcher.add(
            "address",
            None,
            [
                {"ent_type": "LOCATION", "OP": "+"},
                {"POS": "SPACE", "OP": "?"},
                {"POS": "PUNCT", "OP": "?"},
                {"POS": "SPACE", "OP": "?"},
                {"LENGTH": 2, "IS_UPPER": True, "IS_ASCII": True},
                {"POS": "SPACE", "OP": "?"},
                {"POS": "PUNCT", "OP": "?"},
                {"POS": "SPACE", "OP": "?"},
                {"POS": "NUM"},
            ],
        )
        self.matcher.add(
            "address",
            None,
            [
                {"ent_type": "LOCATION", "OP": "+"},
                {"POS": "SPACE", "OP": "?"},
                {"POS": "PUNCT", "OP": "?"},
                {"POS": "SPACE", "OP": "?"},
                {"ent_type": "LOCATION", "OP": "+"},
            ],
        )

    def __call__(self, doc):
        matches = self.matcher(doc)
        # print(matches)
        gpe_entity_l = self.nlp.vocab.strings["GPE"]
        location_entity_l = self.nlp.vocab.strings["LOCATION"]
        org_entity_l = self.nlp.vocab.strings["ORG"]
        person_entity_l = self.nlp.vocab.strings["PERSON"]
        fac_entity_l = self.nlp.vocab.strings["FAC"]
        norp_entity_l = self.nlp.vocab.strings["NORP"]

        for _, _, end in matches:
            if doc.has_extension("address_entities") is False:
                doc.set_extension("address_entities", default="")
            if (
                doc.has_extension("address_entities")
                and type(doc._.address_entities) == type("")
                and doc._.address_entities.strip() == ""
            ):
                doc._.address_entities = list()
            if doc.has_extension("address_entities"):
                address_text = [str(doc[end - 1])]
                # start getting address text from entity end
                current_end = end - 2
                is_person_colon_address_ending = False
                total_tokens = 0
                while current_end > 0 and total_tokens < 40:
                    token = doc[current_end]
                    if str(token).lower() == "address":
                        is_person_colon_address_ending = True
                        break
                    if token.pos_ in ("PUNCT", "NUM", "PROPN", "SPACE", "NOUN", "SYM", "ADJ"):
                        if token.pos_ == "PUNCT" and str(token) not in (",", "\\", "/", "-"):
                            is_person_colon_address_ending = True
                            break
                        if os.linesep in str(token):
                            is_person_colon_address_ending = True
                            break
                        # if str(token).istitle() is False and str(token).isupper() is False:
                        #     break
                        # if token.pos_ == 'PUNCT' and str(token) == ',':
                        address_text.append(str(token))
                        # if token.pos_ == 'NUM':
                        #     break
                    elif token.ent_type in (
                        gpe_entity_l,
                        location_entity_l,
                        fac_entity_l,
                        norp_entity_l,
                        org_entity_l,
                    ):
                        address_text.append(str(token))
                    elif token._.is_location_gz == True:
                        address_text.append(str(token))
                    elif token.ent_type == person_entity_l:
                        is_person_colon_address_ending = True
                        break
                    else:
                        break
                    total_tokens += 1
                    current_end -= 1

                if is_person_colon_address_ending == True:
                    address_text_join = " ".join(address_text[::-1])
                    address_text_join = " ".join(address_text_join.split())
                    address_text_join = address_text_join.replace(" ,", ",")
                    if (
                        len(address_text_join.split(os.linesep)) < 4
                        and len(address_text_join.split()) > 1
                    ):
                        if AddressComponent.is_address_classifier.filter_address(address_text_join):
                            doc._.address_entities.append(
                                {"label_": "ADDRESS", "text": address_text_join}
                            )
                            self.filtered_addresses.append(
                                {"Text": address_text_join, "is_address": 0}
                            )
                        else:
                            self.added_addresses.append(
                                {"Text": address_text_join, "is_address": 1}
                            )
        # import json
        # with open('added_addresses.txt', 'w') as f:
        #     f.write(json.dumps(self.filtered_addresses))
        # with open('filtered_addresses.txt', 'w') as f:
        #     f.write(json.dumps(self.added_addresses))
        return doc
