# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2022 Florian Snow <florian@familysnow.net>
# SPDX-FileCopyrightText: 2022 Pietro Albini <pietro.albini@ferrous-systems.com>
# SPDX-FileCopyrightText: 2023 DB Systel GmbH
# SPDX-FileCopyrightText: 2023 Carmen Bianca BAKKER <carmenbianca@fsfe.org>
# SPDX-FileCopyrightText: 2024 Kerry McAdams <github@klmcadams>
# SPDX-FileCopyrightText: 2024 Sebastien Morais <github@SMoraisAnsys>
# SPDX-FileCopyrightText: 2025 Simon Barth <simon.barth@gmx.de>
#
# SPDX-License-Identifier: GPL-3.0-or-later

"""Module that contains reports about files and projects for linting."""

import bdb
import contextlib
import datetime
import logging
import random
from collections import defaultdict
from collections.abc import Collection, Generator
from concurrent.futures import ProcessPoolExecutor
from functools import cached_property
from hashlib import md5
from io import StringIO
from os import cpu_count
from pathlib import Path, PurePath
from typing import Any, Final, NamedTuple, Optional, Protocol, cast
from uuid import uuid4

from . import __REUSE_version__, __version__
from ._util import (
    _add_plus_to_identifier,
    _checksum,
    _strip_plus_from_identifier,
)
from .copyright import SpdxExpression
from .extract import _LICENSEREF_PATTERN
from .global_licensing import ReuseDep5
from .i18n import _
from .project import Project, ReuseInfo
from .types import StrPath

_LOGGER = logging.getLogger(__name__)

LINT_VERSION = "1.0"

_CPU_COUNT: Final[int] = cpu_count() or 1
#: This variable exists to be able to override parallelisation. If set to
#: :const:`False`, generating :meth:`FileReport.generate` will not use
#: parallelisation.
ENABLE_PARALLEL = True

# REUSE-IgnoreStart


class _MultiprocessingContainer:
    """Container that remembers some data in order to generate a FileReport."""

    def __init__(
        self, project: Project, do_checksum: bool, add_license_concluded: bool
    ):
        if isinstance(project.global_licensing, ReuseDep5):
            # Remember that a dep5_copyright was (or was not) set prior.
            self.has_dep5 = bool(project.global_licensing)
            # TODO: We create a copy of the project in the following
            # song-and-dance because the debian Copyright object cannot be
            # pickled.
            new_project = Project(
                project.root,
                vcs_strategy=project.vcs_strategy,
                license_map=project.license_map,
                licenses=project.licenses.copy(),
                # TODO: adjust this method/class to account for REUSE.toml as
                # well. Unset dep5_copyright
                global_licensing=None,
                include_submodules=project.include_submodules,
                include_meson_subprojects=project.include_meson_subprojects,
            )
            new_project.licenses_without_extension = (
                project.licenses_without_extension
            )
            self.project = new_project
        else:
            self.has_dep5 = False
            self.project = project

        self.reuse_dep5: ReuseDep5 | None = None
        self.do_checksum = do_checksum
        self.add_license_concluded = add_license_concluded

    def __call__(self, file_: StrPath) -> "_MultiprocessingResult":
        # By remembering that we've parsed the .reuse/dep5, we only parse it
        # once (the first time) inside of each process.
        if self.has_dep5 and not self.reuse_dep5:
            with contextlib.suppress(Exception):
                self.reuse_dep5 = ReuseDep5.from_file(
                    self.project.root / ".reuse/dep5"
                )
                self.project.global_licensing = self.reuse_dep5
        # pylint: disable=broad-except
        try:
            return _MultiprocessingResult(
                file_,
                FileReport.generate(
                    self.project,
                    file_,
                    do_checksum=self.do_checksum,
                    add_license_concluded=self.add_license_concluded,
                ),
                None,
            )
        except Exception as exc:
            return _MultiprocessingResult(file_, None, exc)


class _MultiprocessingResult(NamedTuple):
    """Result of :class:`MultiprocessingContainer`."""

    path: StrPath
    report: Optional["FileReport"]
    error: Exception | None


def _generate_file_reports(
    project: Project,
    do_checksum: bool = True,
    subset_files: Collection[StrPath] | None = None,
    multiprocessing: bool = _CPU_COUNT > 1,
    add_license_concluded: bool = False,
) -> Generator[_MultiprocessingResult, None, None]:
    """Create a :class:`FileReport` for every file in the project, filtered
    by *subset_files*.
    """
    container = _MultiprocessingContainer(
        project, do_checksum, add_license_concluded
    )

    files = (
        project.subset_files(subset_files)
        if subset_files is not None
        else project.all_files()
    )
    if multiprocessing and ENABLE_PARALLEL:
        files_set = frozenset(files)
        with ProcessPoolExecutor() as executor:
            yield from executor.map(
                container,
                files_set,
                chunksize=max(1, int(len(files_set) / _CPU_COUNT / 4)),
            )
    else:
        yield from map(container, files)


def _process_error(error: Exception, path: StrPath) -> None:
    # Facilitate better debugging by being able to quit the program.
    if isinstance(error, (bdb.BdbQuit, KeyboardInterrupt)):
        raise error
    if isinstance(error, (OSError, UnicodeError)):
        _LOGGER.error(
            _("Could not read '{path}'").format(path=path),
            exc_info=error,
        )
    else:
        _LOGGER.error(
            _("Unexpected error occurred while parsing '{path}'").format(
                path=path
            ),
            exc_info=error,
        )


class ProjectReportSubsetProtocol(Protocol):
    """A :class:`Protocol` that defines a subset of functionality of
    :class:`ProjectReport`, implemented by :class:`ProjectSubsetReport`.
    """

    path: StrPath
    read_errors: set[Path]
    file_reports: set["FileReport"]

    @property
    def missing_licenses(self) -> dict[str, set[Path]]:
        """Files which refer to a license which do not exist in the LICENSES/
        directory.
        """

    @property
    def invalid_spdx_expressions(self) -> dict[Path, set[str]]:
        """Invalid expressions by file."""

    @property
    def files_without_licenses(self) -> set[Path]:
        """Set of paths that have no licensing information."""

    @property
    def files_without_copyright(self) -> set[Path]:
        """Set of paths that have no copyright information."""

    @property
    def is_compliant(self) -> bool:
        """Whether the report subset is compliant with the REUSE Spec."""


class ProjectReport:
    """Object that holds linting report about the project."""

    def __init__(self, do_checksum: bool = True):
        self.path: StrPath = ""
        self.licenses: dict[str, Path] = {}
        self.read_errors: set[Path] = set()
        self.file_reports: set[FileReport] = set()
        self.licenses_without_extension: dict[str, Path] = {}

        self.do_checksum = do_checksum

        self._license_map: dict[str, dict] = {}

    def to_dict_lint(self) -> dict[str, Any]:
        """Collects and formats data relevant to linting from report and returns
        it as a dictionary.

        Returns:
            Dictionary containing data from the ProjectReport object.
        """
        # Setup report data container
        data: dict[str, Any] = {
            "non_compliant": {
                "bad_licenses": list(sorted(self.bad_licenses)),
                "deprecated_licenses": list(
                    sorted(str(file) for file in self.deprecated_licenses)
                ),
                "licenses_without_extension": list(
                    sorted(self.licenses_without_extension)
                ),
                "missing_licenses": list(sorted(self.missing_licenses)),
                "unused_licenses": list(
                    sorted(str(file) for file in self.unused_licenses)
                ),
                "read_errors": list(
                    sorted(str(file) for file in self.read_errors)
                ),
                "missing_copyright_info": list(
                    sorted(str(file) for file in self.files_without_copyright)
                ),
                "missing_licensing_info": list(
                    sorted(str(file) for file in self.files_without_licenses)
                ),
            },
            "files": [],
            "summary": {
                "used_licenses": [],
            },
            "recommendations": self.recommendations,
        }

        # Populate 'files'
        for file_report in self.file_reports:
            data["files"].append(file_report.to_dict_lint())

        # Populate 'summary'
        number_of_files = len(self.file_reports)
        data["summary"] = {
            "used_licenses": list(sorted(self.used_licenses)),
            "files_total": number_of_files,
            "files_with_copyright_info": number_of_files
            - len(self.files_without_copyright),
            "files_with_licensing_info": number_of_files
            - len(self.files_without_licenses),
            "compliant": self.is_compliant,
        }

        # Add the top three keys
        unsorted_data = {
            "lint_version": LINT_VERSION,
            "reuse_spec_version": __REUSE_version__,
            "reuse_tool_version": __version__,
            **data,
        }

        # Sort dictionary keys while keeping the top three keys at the beginning
        # and the recommendations on the bottom
        sorted_keys = sorted(list(unsorted_data.keys()))
        sorted_keys.remove("lint_version")
        sorted_keys.remove("reuse_spec_version")
        sorted_keys.remove("reuse_tool_version")
        sorted_keys.remove("recommendations")
        sorted_keys = (
            [
                "lint_version",
                "reuse_spec_version",
                "reuse_tool_version",
            ]
            + sorted_keys
            + ["recommendations"]
        )

        sorted_data = {key: unsorted_data[key] for key in sorted_keys}

        return sorted_data

    def bill_of_materials(
        self,
        creator_person: str | None = None,
        creator_organization: str | None = None,
    ) -> str:
        """Generate a bill of materials from the project.

        See https://spdx.org/specifications.
        """
        out = StringIO()
        # Write mandatory tags
        out.write("SPDXVersion: SPDX-2.1\n")
        out.write("DataLicense: CC0-1.0\n")
        out.write("SPDXID: SPDXRef-DOCUMENT\n")

        out.write(f"DocumentName: {Path(self.path).resolve().name}\n")
        # TODO: Generate UUID from git revision maybe
        # TODO: Fix the URL
        out.write(
            f"DocumentNamespace: http://spdx.org/spdxdocs/spdx-v2.1-{uuid4()}\n"
        )

        # Author
        out.write(f"Creator: Person: {format_creator(creator_person)}\n")
        out.write(
            f"Creator: Organization: {format_creator(creator_organization)}\n"
        )
        out.write(f"Creator: Tool: reuse-{__version__}\n")

        now = datetime.datetime.now(tz=datetime.timezone.utc)
        out.write(f"Created: {now.strftime('%Y-%m-%dT%H:%M:%SZ')}\n")
        out.write(
            "CreatorComment: <text>This document was created automatically"
            " using available reuse information consistent with"
            " REUSE.</text>\n"
        )

        reports = sorted(self.file_reports, key=lambda x: x.name)

        for report in reports:
            out.write(
                "Relationship: SPDXRef-DOCUMENT DESCRIBES"
                f" {report.spdx_id}\n"
            )

        for report in reports:
            out.write("\n")
            out.write(f"FileName: {report.name}\n")
            out.write(f"SPDXID: {report.spdx_id}\n")
            out.write(f"FileChecksum: SHA1: {report.chk_sum}\n")
            out.write(f"LicenseConcluded: {report.license_concluded}\n")

            for lic in sorted(report.licenses_in_file):
                out.write(f"LicenseInfoInFile: {lic}\n")
            if report.copyright:
                out.write(
                    "FileCopyrightText:" f" <text>{report.copyright}</text>\n"
                )
            else:
                out.write("FileCopyrightText: NONE\n")

        # Licenses
        for lic, path in sorted(self.licenses.items()):
            if _LICENSEREF_PATTERN.match(lic):
                out.write("\n")
                out.write(f"LicenseID: {lic}\n")
                out.write("LicenseName: NOASSERTION\n")

                with (Path(self.path) / path).open(encoding="utf-8") as fp:
                    out.write(f"ExtractedText: <text>{fp.read()}</text>\n")

        return out.getvalue()

    @classmethod
    def generate(
        cls,
        project: Project,
        do_checksum: bool = True,
        multiprocessing: bool = _CPU_COUNT > 1,
        add_license_concluded: bool = False,
    ) -> "ProjectReport":
        """Generate a :class:`ProjectReport` from a :class:`Project`.

        Args:
            project: The :class:`Project` to lint.
            do_checksum: Generate a checksum of every file. If this is
                :const:`False`, generate a random checksum for every file.
            multiprocessing: Whether to use multiprocessing.
            add_license_concluded: Whether to aggregate all found SPDX
                expressions into a concluded license.
        """
        project_report = cls(do_checksum=do_checksum)
        project_report.path = project.root
        project_report.licenses = project.licenses
        project_report._license_map = project.license_map
        project_report.licenses_without_extension = (
            project.licenses_without_extension
        )

        results = _generate_file_reports(
            project,
            do_checksum=do_checksum,
            multiprocessing=multiprocessing,
            add_license_concluded=add_license_concluded,
        )
        for result in results:
            if result.error:
                _process_error(result.error, result.path)
                project_report.read_errors.add(Path(result.path))
                continue

            file_report = cast(FileReport, result.report)
            project_report.file_reports.add(file_report)

        return project_report

    @cached_property
    def used_licenses(self) -> set[str]:
        """Set of license identifiers that are found in file reports."""
        return {
            lic
            for file_report in self.file_reports
            for lic in file_report.licenses_in_file
        }

    @cached_property
    def bad_licenses(self) -> dict[str, Path]:
        """Licenses in LICENSES/ which are not valid SPDX licenses."""
        return {
            lic: path
            for lic, path in self.licenses.items()
            if lic not in self._license_map
        }

    @cached_property
    def deprecated_licenses(self) -> set[str]:
        """Licenses whose SPDX License identifier has been deprecated."""
        return {
            lic
            for lic in self.licenses
            if lic in self._license_map
            and self._license_map[lic]["isDeprecatedLicenseId"]
        }

    @cached_property
    def unused_licenses(self) -> set[str]:
        """Set of license identifiers that are not found in any file report."""
        return {
            lic
            for lic in self.licenses
            if not any(
                identifier in self.used_licenses
                for identifier in (lic, _add_plus_to_identifier(lic))
            )
        }

    @cached_property
    def missing_licenses(self) -> dict[str, set[Path]]:
        """Files which refer to a license which do not exist in the LICENSES/
        directory.
        """
        result = defaultdict(set)
        for file_report in self.file_reports:
            for missing_license in file_report.missing_licenses:
                result[missing_license].add(file_report.path)
        return result

    @cached_property
    def invalid_spdx_expressions(self) -> dict[Path, set[str]]:
        """Invalid expressions by file."""
        return {
            file_report.path: file_report.invalid_spdx_expressions
            for file_report in self.file_reports
            if file_report.invalid_spdx_expressions
        }

    @cached_property
    def files_without_licenses(self) -> set[Path]:
        """Set of paths that have no licensing information."""
        return {
            file_report.path
            for file_report in self.file_reports
            if not file_report.licenses_in_file
        }

    @cached_property
    def files_without_copyright(self) -> set[Path]:
        """Set of paths that have no copyright information."""
        return {
            file_report.path
            for file_report in self.file_reports
            if not file_report.copyright
        }

    @cached_property
    def is_compliant(self) -> bool:
        """Whether the report is compliant with the REUSE Spec."""
        return not any(
            (
                self.missing_licenses,
                self.unused_licenses,
                self.bad_licenses,
                self.deprecated_licenses,
                self.licenses_without_extension,
                self.read_errors,
                self.invalid_spdx_expressions,
                self.files_without_copyright,
                self.files_without_licenses,
            )
        )

    @property
    def recommendations(self) -> list[str]:
        """Generate help for next steps based on found REUSE issues"""
        recommendations = []

        # These items should be ordered in the same way as in the summary.
        if self.bad_licenses:
            recommendations.append(
                _(
                    "Fix bad licenses: At least one license in the LICENSES"
                    " directory and/or provided by 'SPDX-License-Identifier'"
                    " tags is invalid. They are either not valid SPDX License"
                    " Identifiers or do not start with 'LicenseRef-'. FAQ about"
                    " custom licenses:"
                    " https://reuse.software/faq/#custom-license"
                )
            )
        if self.deprecated_licenses:
            recommendations.append(
                _(
                    "Fix deprecated licenses: At least one of the licenses in"
                    " the LICENSES directory and/or provided by an"
                    " 'SPDX-License-Identifier' tag or in '.reuse/dep5' has"
                    " been deprecated by SPDX. The current list and their"
                    " respective recommended  new identifiers can be found"
                    " here: <https://spdx.org/licenses/#deprecated>"
                )
            )
        if self.licenses_without_extension:
            recommendations.append(
                _(
                    "Fix licenses without file extension: At least one license"
                    " text file in the 'LICENSES' directory does not have a"
                    " '.txt' file extension. Please rename the file(s)"
                    " accordingly."
                )
            )
        if self.missing_licenses:
            recommendations.append(
                _(
                    "Fix missing licenses: For at least one of the license"
                    " identifiers provided by the 'SPDX-License-Identifier'"
                    " tags, there is no corresponding license text file in the"
                    " 'LICENSES' directory. For SPDX license identifiers, you"
                    " can simply run 'reuse download --all' to get any missing"
                    " ones. For custom licenses (starting with 'LicenseRef-'),"
                    " you need to add these files yourself."
                )
            )
        if self.unused_licenses:
            recommendations.append(
                _(
                    "Fix unused licenses: At least one of the license text"
                    " files in 'LICENSES' is not referenced by any file, e.g."
                    " by an 'SPDX-License-Identifier' tag. Please make sure"
                    " that you either tag the accordingly licensed files"
                    " properly, or delete the unused license text if you are"
                    " sure that no file or code snippet is licensed as such."
                )
            )
        if self.read_errors:
            recommendations.append(
                _(
                    "Fix read errors: At least one of the files in your"
                    " directory cannot be read by the tool. Please check the"
                    " file permissions. You will find the affected files at the"
                    " top of the output as part of the logged error messages."
                )
            )
        if self.invalid_spdx_expressions:
            recommendations.append(
                _(
                    "Fix invalid SPDX License Expressions: In one or more files"
                    " there are SPDX License Expressions which cannot be"
                    " parse. Check whether the value that follows"
                    " 'SPDX-License-Identifier:' is correct. If the detected"
                    " expression is not meant to be valid, put it between"
                    " 'REUSE-IgnoreStart' and 'REUSE-IgnoreEnd' comments."
                )
            )
        if self.files_without_copyright or self.files_without_licenses:
            recommendations.append(
                _(
                    "Fix missing copyright/licensing information: For one or"
                    " more files, the tool cannot find copyright and/or"
                    " licensing information. You typically do this by adding"
                    " 'SPDX-FileCopyrightText' and 'SPDX-License-Identifier'"
                    " tags to each file. The tutorial explains additional ways"
                    " to do this: <https://reuse.software/tutorial/>"
                )
            )

        return recommendations


class ProjectSubsetReport:
    """Like a :class:`ProjectReport`, but for a subset of the files using a
    subset of features.
    """

    def __init__(self) -> None:
        self.path: StrPath = ""
        self.read_errors: set[Path] = set()
        self.file_reports: set[FileReport] = set()

    @classmethod
    def generate(
        cls,
        project: Project,
        subset_files: Collection[StrPath],
        multiprocessing: bool = _CPU_COUNT > 1,
    ) -> "ProjectSubsetReport":
        """Generate a :class:`ProjectSubsetReport` from a :class:`Project`.

        Args:
            project: The :class:`Project` to lint.
            subset_files: Only lint the files in this list.
            multiprocessing: Whether to use multiprocessing.
        """
        subset_report = cls()
        subset_report.path = project.root
        results = _generate_file_reports(
            project,
            do_checksum=False,
            subset_files=subset_files,
            multiprocessing=multiprocessing,
            add_license_concluded=False,
        )
        for result in results:
            if result.error:
                _process_error(result.error, result.path)
                subset_report.read_errors.add(Path(result.path))
                continue

            file_report = cast(FileReport, result.report)
            subset_report.file_reports.add(file_report)

        return subset_report

    @property
    def missing_licenses(self) -> dict[str, set[Path]]:
        """Files which refer to a license which do not exist in the LICENSES/
        directory.
        """
        result = defaultdict(set)
        for file_report in self.file_reports:
            for missing_license in file_report.missing_licenses:
                result[missing_license].add(file_report.path)
        return result

    @property
    def invalid_spdx_expressions(self) -> dict[Path, set[str]]:
        """Invalid expressions by file."""
        return {
            file_report.path: file_report.invalid_spdx_expressions
            for file_report in self.file_reports
            if file_report.invalid_spdx_expressions
        }

    @property
    def files_without_licenses(self) -> set[Path]:
        """Set of paths that have no licensing information."""
        return {
            file_report.path
            for file_report in self.file_reports
            if not file_report.licenses_in_file
        }

    @property
    def files_without_copyright(self) -> set[Path]:
        """Set of paths that have no copyright information."""
        return {
            file_report.path
            for file_report in self.file_reports
            if not file_report.copyright
        }

    @property
    def is_compliant(self) -> bool:
        """Whether the report subset is compliant with the REUSE Spec."""
        return not any(
            (
                self.missing_licenses,
                self.files_without_copyright,
                self.files_without_licenses,
                self.read_errors,
            )
        )


class FileReport:  # pylint: disable=too-many-instance-attributes
    """Object that holds a linting report about a single file."""

    def __init__(self, name: str, path: StrPath, do_checksum: bool = True):
        self.name = name
        self.path = Path(path)
        self.do_checksum = do_checksum

        self.reuse_infos: list[ReuseInfo] = []

        self.spdx_id: str | None = None
        self.chk_sum: str | None = None
        self.licenses_in_file: list[str] = []
        self.license_concluded: str = ""
        self.copyright: str = ""

        self.missing_licenses: set[str] = set()
        self.invalid_spdx_expressions: set[str] = set()

    def to_dict_lint(self) -> dict[str, Any]:
        """Turn the report into a json-like dictionary with exclusively
        information relevant for linting.
        """
        return {
            "path": PurePath(self.name).as_posix(),
            "copyrights": [
                {
                    "value": str(line),
                    "source": reuse_info.source_path,
                    "source_type": (
                        reuse_info.source_type.value
                        if reuse_info.source_type
                        else None
                    ),
                }
                for reuse_info in self.reuse_infos
                for line in reuse_info.copyright_notices
            ],
            "spdx_expressions": [
                {
                    "value": str(expression),
                    "is_valid": expression.is_valid,
                    "source": reuse_info.source_path,
                    "source_type": (
                        reuse_info.source_type.value
                        if reuse_info.source_type
                        else None
                    ),
                }
                for reuse_info in self.reuse_infos
                for expression in reuse_info.spdx_expressions
            ],
        }

    @classmethod
    def generate(
        cls,
        project: Project,
        path: StrPath,
        do_checksum: bool = True,
        add_license_concluded: bool = False,
    ) -> "FileReport":
        """Generate a FileReport from a path in a Project."""
        # pylint: disable=too-many-branches
        path = Path(path)
        if not path.is_file():
            raise OSError(f"{path} is not a file")

        relative = project.relative_from_root(path)
        report = cls(f"./{relative}", path, do_checksum=do_checksum)

        # Checksum and ID
        if report.do_checksum:
            report.chk_sum = _checksum(path)
        else:
            # This path avoids a lot of heavy computation, which is handy for
            # scenarios where you only need a unique hash, not a consistent
            # hash.
            report.chk_sum = f"{random.getrandbits(160):040x}"
        spdx_id = md5()
        spdx_id.update(report.name.encode("utf-8"))
        spdx_id.update(report.chk_sum.encode("utf-8"))
        report.spdx_id = f"SPDXRef-{spdx_id.hexdigest()}"

        reuse_infos = project.reuse_info_of(path)
        for reuse_info in reuse_infos:
            for expression in reuse_info.spdx_expressions:
                if not expression.is_valid:
                    report.invalid_spdx_expressions.add(str(expression))
                    continue
                for identifier in expression.licenses:
                    # A license expression akin to Apache-1.0+ should register
                    # correctly if LICENSES/Apache-1.0.txt exists.
                    identifiers = {identifier}
                    if (
                        plus_identifier := _strip_plus_from_identifier(
                            identifier
                        )
                    ) != identifier:
                        identifiers.add(plus_identifier)
                    # Missing license
                    if not identifiers.intersection(project.licenses):
                        report.missing_licenses.add(identifier)

                    # Add license to report.
                    report.licenses_in_file.append(identifier)

        if not add_license_concluded:
            report.license_concluded = "NOASSERTION"
        elif not any(reuse_info.spdx_expressions for reuse_info in reuse_infos):
            report.license_concluded = "NONE"
        elif report.invalid_spdx_expressions:
            report.license_concluded = "NOASSERTION"
        else:
            # Merge all the license expressions together, wrapping them in
            # parentheses to make sure an expression doesn't spill into another
            # one. The extra parentheses will be removed by the roundtrip
            # through parse() -> simplify() -> render().
            report.license_concluded = str(
                SpdxExpression.combine(
                    list(
                        expression
                        for reuse_info in reuse_infos
                        for expression in reuse_info.spdx_expressions
                    )
                ).simplify()
            )

        # Copyright text
        report.copyright = "\n".join(
            map(
                str,
                sorted(
                    line
                    for reuse_info in reuse_infos
                    for line in reuse_info.copyright_notices
                ),
            )
        )
        # Source of licensing and copyright info
        report.reuse_infos = reuse_infos
        return report

    def __hash__(self) -> int:
        if self.chk_sum is not None:
            return hash(self.name + self.chk_sum)
        return super().__hash__()


def format_creator(creator: str | None) -> str:
    """Render the creator field based on the provided flag"""
    if creator is None:
        return "Anonymous ()"
    if "(" in creator and creator.endswith(")"):
        # The creator field already contains an email address
        return creator
    return creator + " ()"


# REUSE-IgnoreEnd