import dataclasses
import os
import re
import textwrap
from typing import (
    Any,
    NotRequired,
    Union,
    Literal,
    TypedDict,
    Annotated,
    Self,
    cast,
)

from debian.deb822 import PkgRelation

from debputy._manifest_constants import (
    MK_INSTALLATION_SEARCH_DIRS,
    MK_BINARY_VERSION,
    MK_SERVICES,
)
from debputy.maintscript_snippet import DpkgMaintscriptHelperCommand, MaintscriptSnippet
from debputy.manifest_parser.base_types import (
    DebputyParsedContentStandardConditional,
    FileSystemExactMatchRule,
)
from debputy.manifest_parser.declarative_parser import ParserGenerator
from debputy.manifest_parser.exceptions import ManifestParseException
from debputy.manifest_parser.parse_hints import DebputyParseHint
from debputy.manifest_parser.parser_data import ParserContextData
from debputy.manifest_parser.tagging_types import DebputyParsedContent
from debputy.manifest_parser.util import AttributePath
from debputy.path_matcher import MatchRule, MATCH_ANYTHING, ExactFileSystemPath
from debputy.plugin.api import reference_documentation
from debputy.plugin.api.impl import (
    DebputyPluginInitializerProvider,
    ServiceDefinitionImpl,
)
from debputy.plugin.api.parser_tables import OPARSER_PACKAGES
from debputy.plugin.api.spec import (
    ServiceUpgradeRule,
    ServiceDefinition,
    DSD,
    INTEGRATION_MODE_DH_DEBPUTY_RRR,
    not_integrations,
)
from debputy.plugins.debputy.types import (
    BuiltUsingBase,
    BuiltUsing,
    StaticBuiltUsing,
    MatchedBuiltUsingRelation,
)
from debputy.transformation_rules import TransformationRule
from debputy.util import _error, manifest_format_doc

ACCEPTABLE_CLEAN_ON_REMOVAL_FOR_GLOBS_AND_EXACT_MATCHES = frozenset(
    [
        "./var/log",
    ]
)


ACCEPTABLE_CLEAN_ON_REMOVAL_IF_EXACT_MATCH_OR_SUBDIR_OF = frozenset(
    [
        "./etc",
        "./run",
        "./var/lib",
        "./var/cache",
        "./var/backups",
        "./var/spool",
        # linux-image uses these paths with some `rm -f`
        "./usr/lib/modules",
        "./lib/modules",
        # udev special case
        "./lib/udev",
        "./usr/lib/udev",
        # pciutils deletes /usr/share/misc/pci.ids.<ext>
        "./usr/share/misc",
    ]
)


def register_binary_package_rules(api: DebputyPluginInitializerProvider) -> None:
    api.pluggable_manifest_rule(
        OPARSER_PACKAGES,
        MK_BINARY_VERSION,
        BinaryVersionParsedFormat,
        _parse_binary_version,
        source_format=str,
        register_value=False,
    )

    api.pluggable_manifest_rule(
        OPARSER_PACKAGES,
        "transformations",
        list[TransformationRule],
        _unpack_list,
        register_value=False,
    )

    api.pluggable_manifest_rule(
        OPARSER_PACKAGES,
        "conffile-management",
        list[DpkgMaintscriptHelperCommand],
        _unpack_list,
        expected_debputy_integration_mode=not_integrations(
            INTEGRATION_MODE_DH_DEBPUTY_RRR
        ),
        register_value=False,
    )

    api.pluggable_manifest_rule(
        OPARSER_PACKAGES,
        MK_SERVICES,
        list[ServiceRuleParsedFormat],
        _process_service_rules,
        source_format=list[ServiceRuleSourceFormat],
        expected_debputy_integration_mode=not_integrations(
            INTEGRATION_MODE_DH_DEBPUTY_RRR
        ),
        register_value=False,
    )

    api.pluggable_manifest_rule(
        OPARSER_PACKAGES,
        "clean-after-removal",
        ListParsedFormat,
        _parse_clean_after_removal,
        # FIXME: debputy won't see the attributes for this one :'(
        # (update `debputy_docs.yaml` when fixed)
        source_format=list[Any],
        expected_debputy_integration_mode=not_integrations(
            INTEGRATION_MODE_DH_DEBPUTY_RRR
        ),
        register_value=False,
    )

    api.pluggable_manifest_rule(
        OPARSER_PACKAGES,
        MK_INSTALLATION_SEARCH_DIRS,
        InstallationSearchDirsParsedFormat,
        _parse_installation_search_dirs,
        source_format=list[FileSystemExactMatchRule],
        expected_debputy_integration_mode=not_integrations(
            INTEGRATION_MODE_DH_DEBPUTY_RRR
        ),
        register_value=False,
    )

    api.pluggable_manifest_rule(
        rule_type=OPARSER_PACKAGES,
        rule_name="built-using",
        parsed_format=list[BuiltUsingParsedFormat],
        handler=_parse_built_using,
        expected_debputy_integration_mode=not_integrations("dh-sequence-zz-debputy-rrr"),
        inline_reference_documentation=reference_documentation(
            title="Built-Using dependency relations (`$RULE_NAME`)",
            description=textwrap.dedent(
                """\
                Generate a `Built-Using` dependency relation on the
                build dependencies selected by the `sources-for`, which
                may contain a `*` wildcard matching any number of
                arbitrary characters.

                The `built-using` should be used for static linking
                where license of dependency libraries require the
                exact source to be retained. Usually these libraries
                will be under the license terms like GNU GPL.

                packages:
                  PKG:
                    $RULE_NAME:
                    - sources-for: foo-*-source   # foo-3.1.0-source
                    - sources-for: librust-*-dev  # several matches
                    - sources-for: foo
                      when:                       # foo is always installed
                        arch-matches: amd64       # but only used on amd64

                Either of these conditions prevents the generation:
                * PKG is not part of the current build because of its
                  `Architecture` or `Build-Profiles` fields.
                * The match in `Build-Depends` carries an
                  architecture or build profile restriction that does
                  not match the current run.
                * The match in `Build-Depends` is not installed.
                  This should only happen inside alternatives, see below.
                * The manifest item carries a `when:` condition that
                  evaluates to false. This may be useful when the match
                  must be installed for unrelated reasons.

                Matches are searched in the `Build-Depends` field of
                the source package, and either `Build-Depends-Indep`
                or `Build-Depends-Arch` depending on PKG.

                In alternatives like `a | b`, each option may match
                separately.  This is a compromise between
                reproducibility on automatic builders (where the set
                of installed package is constant), and least surprise
                during local builds (where `b` may be installed
                alone). There seems to be no one-size fits all
                solution when both are installed.

                Architecture qualifiers and version restrictions in
                `Build-Depends` are ignored. The only allowed
                co-installations require a common source and version.
                """,
            ),
        ),
    )

    api.pluggable_manifest_rule(
        rule_type=OPARSER_PACKAGES,
        rule_name="static-built-using",
        parsed_format=list[BuiltUsingParsedFormat],
        handler=_parse_static_built_using,
        expected_debputy_integration_mode=not_integrations("dh-sequence-zz-debputy-rrr"),
        inline_reference_documentation=reference_documentation(
            title="Static-Built-Using dependency relations (`$RULE_NAME`)",
            description=textwrap.dedent(
                """\
                Generate a `Static-Built-Using` dependency relation on the
                build dependencies selected by the `sources-for`, which
                may contain a `*` wildcard matching any number of
                arbitrary characters.

                The `static-built-using` should be used for static linking
                where license of dependency libraries do not require the
                exact source to be retained. This is usually libraries under
                permissive libraries like Apache-2.0 or MIT/X11/Expat.

                packages:
                  PKG:
                    $RULE_NAME:
                    - sources-for: foo-*-source   # foo-3.1.0-source
                    - sources-for: librust-*-dev  # several matches
                    - sources-for: foo
                      when:                       # foo is always installed
                        arch-matches: amd64       # but only used on amd64

                Either of these conditions prevents the generation:
                * PKG is not part of the current build because of its
                  `Architecture` or `Build-Profiles` fields.
                * The match in `Build-Depends` carries an
                  architecture or build profile restriction that does
                  not match the current run.
                * The match in `Build-Depends` is not installed.
                  This should only happen inside alternatives, see below.
                * The manifest item carries a `when:` condition that
                  evaluates to false. This may be useful when the match
                  must be installed for unrelated reasons.

                Matches are searched in the `Build-Depends` field of
                the source package, and either `Build-Depends-Indep`
                or `Build-Depends-Arch` depending on PKG.

                In alternatives like `a | b`, each option may match
                separately.  This is a compromise between
                reproducibility on automatic builders (where the set
                of installed package is constant), and least surprise
                during local builds (where `b` may be installed
                alone). There seems to be no one-size fits all
                solution when both are installed.

                Architecture qualifiers and version restrictions in
                `Build-Depends` are ignored. The only allowed
                co-installations require a common source and version.
                """,
            ),
        ),
    )


class ServiceRuleSourceFormat(TypedDict):
    service: str
    type_of_service: NotRequired[str]
    service_scope: NotRequired[Literal["system", "user"]]
    enable_on_install: NotRequired[bool]
    start_on_install: NotRequired[bool]
    on_upgrade: NotRequired[ServiceUpgradeRule]
    service_manager: NotRequired[
        Annotated[str, DebputyParseHint.target_attribute("service_managers")]
    ]
    service_managers: NotRequired[list[str]]


class ServiceRuleParsedFormat(DebputyParsedContent):
    service: str
    type_of_service: NotRequired[str]
    service_scope: NotRequired[Literal["system", "user"]]
    enable_on_install: NotRequired[bool]
    start_on_install: NotRequired[bool]
    on_upgrade: NotRequired[ServiceUpgradeRule]
    service_managers: NotRequired[list[str]]


@dataclasses.dataclass(slots=True, frozen=True)
class ServiceRule:
    definition_source: str
    service: str
    type_of_service: str
    service_scope: Literal["system", "user"]
    enable_on_install: bool | None
    start_on_install: bool | None
    on_upgrade: ServiceUpgradeRule | None
    service_managers: frozenset[str] | None

    @classmethod
    def from_service_rule_parsed_format(
        cls,
        data: ServiceRuleParsedFormat,
        attribute_path: AttributePath,
    ) -> "Self":
        service_managers = data.get("service_managers")
        return cls(
            attribute_path.path,
            data["service"],
            data.get("type_of_service", "service"),
            cast("Literal['system', 'user']", data.get("service_scope", "system")),
            data.get("enable_on_install"),
            data.get("start_on_install"),
            data.get("on_upgrade"),
            frozenset(service_managers) if service_managers else service_managers,
        )

    def applies_to_service_manager(self, service_manager: str) -> bool:
        return self.service_managers is None or service_manager in self.service_managers

    def apply_to_service_definition(
        self,
        service_definition: ServiceDefinition[DSD],
    ) -> ServiceDefinition[DSD]:
        assert isinstance(service_definition, ServiceDefinitionImpl)
        if not service_definition.is_plugin_provided_definition:
            _error(
                f"Conflicting definitions related to {self.service} (type: {self.type_of_service},"
                f" scope: {self.service_scope}). First definition at {service_definition.definition_source},"
                f" the second at {self.definition_source}). If they are for different service managers,"
                " you can often avoid this problem by explicitly defining which service managers are applicable"
                ' to each rule via the "service-managers" keyword.'
            )
        changes = {
            "definition_source": self.definition_source,
            "is_plugin_provided_definition": False,
        }
        if (
            self.service != service_definition.name
            and self.service in service_definition.names
        ):
            changes["name"] = self.service
        if self.enable_on_install is not None:
            changes["auto_start_on_install"] = self.enable_on_install
        if self.start_on_install is not None:
            changes["auto_start_on_install"] = self.start_on_install
        if self.on_upgrade is not None:
            changes["on_upgrade"] = self.on_upgrade

        return service_definition.replace(**changes)


class BinaryVersionParsedFormat(DebputyParsedContent):
    binary_version: str


class BuiltUsingParsedFormat(DebputyParsedContentStandardConditional):
    """Also used for static-built-using."""

    sources_for: str


class ListParsedFormat(DebputyParsedContent):
    elements: list[Any]


class ListOfTransformationRulesFormat(DebputyParsedContent):
    elements: list[TransformationRule]


class ListOfDpkgMaintscriptHelperCommandFormat(DebputyParsedContent):
    elements: list[DpkgMaintscriptHelperCommand]


class InstallationSearchDirsParsedFormat(DebputyParsedContent):
    installation_search_dirs: list[FileSystemExactMatchRule]


def _parse_binary_version(
    _name: str,
    parsed_data: BinaryVersionParsedFormat,
    _attribute_path: AttributePath,
    _parser_context: ParserContextData,
) -> str:
    return parsed_data["binary_version"]


def _parse_installation_search_dirs(
    _name: str,
    parsed_data: InstallationSearchDirsParsedFormat,
    _attribute_path: AttributePath,
    _parser_context: ParserContextData,
) -> list[FileSystemExactMatchRule]:
    return parsed_data["installation_search_dirs"]


def _process_service_rules(
    _name: str,
    parsed_data: list[ServiceRuleParsedFormat],
    attribute_path: AttributePath,
    _parser_context: ParserContextData,
) -> list[ServiceRule]:
    return [
        ServiceRule.from_service_rule_parsed_format(x, attribute_path[i])
        for i, x in enumerate(parsed_data)
    ]


def _parse_built_using(
    name: str,
    parsed_data: list[BuiltUsingParsedFormat],
    attribute_path: AttributePath,
    parser_context: ParserContextData,
) -> list[BuiltUsing]:
    return _built_using_handler(
        BuiltUsing,
        name,
        parsed_data,
        attribute_path,
        parser_context,
    )


def _parse_static_built_using(
    name: str,
    parsed_data: list[BuiltUsingParsedFormat],
    attribute_path: AttributePath,
    parser_context: ParserContextData,
) -> list[StaticBuiltUsing]:
    return _built_using_handler(
        StaticBuiltUsing,
        name,
        parsed_data,
        attribute_path,
        parser_context,
    )


_VALID_BUILT_USING_GLOB = re.compile("[a-z*][a-z0-9.+*-]*")
_BUILT_USING_GLOB_TO_RE = str.maketrans({".": "[.]", "+": "[+]", "*": ".*"})


def _validate_built_using[T: BuiltUsingBase](
    cls: type[T],
    parsed_data: BuiltUsingParsedFormat,
    attribute_path: AttributePath,
    parser_context: ParserContextData,
) -> T:
    # Process an item in a (static-)built-using list.
    raw_glob = parsed_data["sources_for"]
    if _VALID_BUILT_USING_GLOB.fullmatch(raw_glob) is None:
        raise ManifestParseException(
            f"The glob {raw_glob!r} defined at {attribute_path["sources_for"].path} contained invalid characters."
            f" It must only characters valid in a package name plus the `*` character"
        )
    regex = re.compile(raw_glob.translate(_BUILT_USING_GLOB_TO_RE))

    pkg = parser_context.current_binary_package_state.binary_package
    if pkg.is_arch_all:
        other = "Build-Depends-Indep"
    else:
        other = "Build-Depends-Arch"
    # pylint: disable=too-many-nested-blocks
    matched_packages = []
    for bd_field in ("Build-Depends", other):
        raw = parser_context.source_package.fields.get(bd_field)
        if raw is not None:
            for options in PkgRelation.parse_relations(raw):
                options: list[PkgRelation.ParsedRelation]
                for idx, relation in enumerate(options):
                    name = relation["name"]
                    if regex.fullmatch(name):
                        matched_packages.append(
                            MatchedBuiltUsingRelation(name, not idx, relation)
                        )
    if not matched_packages:
        raise ManifestParseException(
            f"The glob {raw_glob!r} defined at {attribute_path["sources_for"].path} does match at least one clause of Build-Depends or {other}."
            " Either a Build-dependency is missing or the glob fails to match the intended build-dependency, or the glob superfluous and can be removed."
        )
    return cls(
        raw_glob,
        matched_packages,
        parsed_data.get("when"),
        attribute_path,
    )


def _built_using_handler[T: BuiltUsingBase](
    cls: type[T],
    _name: str,
    parsed_data: list[BuiltUsingParsedFormat],
    attribute_path: AttributePath,
    parser_context: ParserContextData,
) -> list[T]:
    return [
        _validate_built_using(
            cls,
            pd,
            attribute_path[idx],
            parser_context,
        )
        for idx, pd in enumerate(parsed_data)
    ]


def _unpack_list(
    _name: str,
    parsed_data: list[Any],
    _attribute_path: AttributePath,
    _parser_context: ParserContextData,
) -> list[Any]:
    return parsed_data


class CleanAfterRemovalRuleSourceFormat(TypedDict):
    path: NotRequired[Annotated[str, DebputyParseHint.target_attribute("paths")]]
    paths: NotRequired[list[str]]
    delete_on: NotRequired[Literal["purge", "removal"]]
    recursive: NotRequired[bool]
    ignore_non_empty_dir: NotRequired[bool]


class CleanAfterRemovalRule(DebputyParsedContent):
    paths: list[str]
    delete_on: NotRequired[Literal["purge", "removal"]]
    recursive: NotRequired[bool]
    ignore_non_empty_dir: NotRequired[bool]


# FIXME: Not optimal that we are doing an initialization of ParserGenerator here. But the rule is not depending on any
#  complex types that is registered by plugins, so it will work for now.
_CLEAN_AFTER_REMOVAL_RULE_PARSER = ParserGenerator().generate_parser(
    CleanAfterRemovalRule,
    source_content=Union[CleanAfterRemovalRuleSourceFormat, str, list[str]],
    inline_reference_documentation=reference_documentation(
        reference_documentation_url=manifest_format_doc(
            "remove-runtime-created-paths-on-purge-or-post-removal-clean-after-removal"
        ),
    ),
)


# Order between clean_on_removal and conffile_management is
# important. We want the dpkg conffile management rules to happen before the
# clean clean_on_removal rules.  Since the latter only affects `postrm`
# and the order is reversed for `postrm` scripts (among other), we need do
# clean_on_removal first to account for the reversing of order.
#
# FIXME: All of this is currently not really possible todo, but it should be.
# (I think it is the correct order by "mistake" rather than by "design", which is
# what this note is about)
def _parse_clean_after_removal(
    _name: str,
    parsed_data: ListParsedFormat,
    attribute_path: AttributePath,
    parser_context: ParserContextData,
) -> None:  # TODO: Return and pass to a maintscript helper
    raw_clean_after_removal = parsed_data["elements"]
    package_state = parser_context.current_binary_package_state

    for no, raw_transformation in enumerate(raw_clean_after_removal):
        definition_source = attribute_path[no]
        clean_after_removal_rules = _CLEAN_AFTER_REMOVAL_RULE_PARSER.parse_input(
            raw_transformation,
            definition_source,
            parser_context=parser_context,
        )
        patterns = clean_after_removal_rules["paths"]
        if patterns:
            definition_source.path_hint = patterns[0]
        delete_on = clean_after_removal_rules.get("delete_on") or "purge"
        recurse = clean_after_removal_rules.get("recursive") or False
        ignore_non_empty_dir = (
            clean_after_removal_rules.get("ignore_non_empty_dir") or False
        )
        if delete_on == "purge":
            condition = '[ "$1" = "purge" ]'
        else:
            condition = '[ "$1" = "remove" ]'

        if ignore_non_empty_dir:
            if recurse:
                raise ManifestParseException(
                    'The "recursive" and "ignore-non-empty-dir" options are mutually exclusive.'
                    f" Both were enabled at the same time in at {definition_source.path}"
                )
            for pattern in patterns:
                if not pattern.endswith("/"):
                    raise ManifestParseException(
                        'When ignore-non-empty-dir is True, then all patterns must end with a literal "/"'
                        f' to ensure they only apply to directories. The pattern "{pattern}" at'
                        f" {definition_source.path} did not."
                    )

        substitution = parser_context.substitution
        match_rules = [
            MatchRule.from_path_or_glob(
                p, definition_source.path, substitution=substitution
            )
            for p in patterns
        ]
        content_lines = [
            f"if {condition}; then\n",
        ]
        for idx, match_rule in enumerate(match_rules):
            original_pattern = patterns[idx]
            if match_rule is MATCH_ANYTHING:
                raise ManifestParseException(
                    f'Using "{original_pattern}" in a clean rule would trash the system.'
                    f" Please restrict this pattern at {definition_source.path} considerably."
                )
            is_subdir_match = False
            matched_directory: str | None
            if isinstance(match_rule, ExactFileSystemPath):
                matched_directory = (
                    os.path.dirname(match_rule.path)
                    if match_rule.path not in ("/", ".", "./")
                    else match_rule.path
                )
                is_subdir_match = True
            else:
                matched_directory = getattr(match_rule, "directory", None)

            if matched_directory is None:
                raise ManifestParseException(
                    f'The pattern "{original_pattern}" defined at {definition_source.path} is not'
                    f" trivially anchored in a specific directory. Cowardly refusing to use it"
                    f" in a clean rule as it may trash the system if the pattern is overreaching."
                    f" Please avoid glob characters in the top level directories."
                )
            assert matched_directory.startswith("./") or matched_directory in (
                ".",
                "./",
                "",
            )
            acceptable_directory = False
            would_have_allowed_direct_match = False
            while matched_directory not in (".", "./", ""):
                # Our acceptable paths set includes "/var/lib" or "/etc".  We require that the
                # pattern is either an exact match, in which case it may match directly inside
                # the acceptable directory OR it is a pattern against a subdirectory of the
                # acceptable path. As an example:
                #
                # /etc/inputrc <-- OK, exact match
                # /etc/foo/*   <-- OK, subdir match
                # /etc/*       <-- ERROR, glob directly in the accepted directory.
                if is_subdir_match and (
                    matched_directory
                    in ACCEPTABLE_CLEAN_ON_REMOVAL_IF_EXACT_MATCH_OR_SUBDIR_OF
                ):
                    acceptable_directory = True
                    break
                if (
                    matched_directory
                    in ACCEPTABLE_CLEAN_ON_REMOVAL_FOR_GLOBS_AND_EXACT_MATCHES
                ):
                    # Special-case: In some directories (such as /var/log), we allow globs directly.
                    # Notably, X11's log files are /var/log/Xorg.*.log
                    acceptable_directory = True
                    break
                if (
                    matched_directory
                    in ACCEPTABLE_CLEAN_ON_REMOVAL_IF_EXACT_MATCH_OR_SUBDIR_OF
                ):
                    would_have_allowed_direct_match = True
                    break
                matched_directory = os.path.dirname(matched_directory)
                is_subdir_match = True

            if would_have_allowed_direct_match and not acceptable_directory:
                raise ManifestParseException(
                    f'The pattern "{original_pattern}" defined at {definition_source.path} seems to'
                    " be overreaching. If it had been a path (and not use a glob), the rule would"
                    " have been permitted."
                )
            elif not acceptable_directory:
                raise ManifestParseException(
                    f'The pattern or path "{original_pattern}" defined at {definition_source.path} seems to'
                    f' be overreaching or not limited to the set of "known acceptable" directories.'
                )

            try:
                shell_escaped_pattern = match_rule.shell_escape_pattern()
            except TypeError:
                raise ManifestParseException(
                    f'Sorry, the pattern "{original_pattern}" defined at {definition_source.path}'
                    f" is unfortunately not supported by `debputy` for clean-after-removal rules."
                    f" If you can rewrite the rule to something like `/var/log/foo/*.log` or"
                    f' similar "trivial" patterns. You may have to rewrite the pattern the rule '
                    f" into multiple patterns to achieve this.  This restriction is to enable "
                    f' `debputy` to ensure the pattern is correctly executed plus catch "obvious'
                    f' system trashing" patterns. Apologies for the inconvenience.'
                )

            if ignore_non_empty_dir:
                cmd = f'    rmdir --ignore-fail-on-non-empty "${{DPKG_ROOT}}"{shell_escaped_pattern}\n'
            elif recurse:
                cmd = f'    rm -fr "${{DPKG_ROOT}}"{shell_escaped_pattern}\n'
            elif original_pattern.endswith("/"):
                cmd = f'    rmdir "${{DPKG_ROOT}}"{shell_escaped_pattern}\n'
            else:
                cmd = f'    rm -f "${{DPKG_ROOT}}"{shell_escaped_pattern}\n'
            content_lines.append(cmd)
        content_lines.append("fi\n")

        snippet = MaintscriptSnippet(definition_source.path, "".join(content_lines))
        package_state.maintscript_snippets["postrm"].append(snippet)
