Source code for jmbuilder.core

"""JMBuilder's core module.

Copyright (c) 2023-2024 Ryuu Mitsuki.
"""

import os as _os
import sys as _sys
import re as _re
from datetime import datetime as _dt, timezone as _tz
from typing import Dict, List, Optional, Union, TextIO
from warnings import warn as __warn
import bs4 as _bs4

from . import utils as _jmutils
from . import exception as _jmexc

try:
    from ._globals import AUTHOR, VERSION, VERSION_INFO
except (ImportError, ModuleNotFoundError, ValueError):
    from pathlib import Path

    # Add a new Python search path to the first index
    _sys.path.insert(0, str(Path(_sys.path[0]).parent))
    del Path

    from jmbuilder._globals import AUTHOR, VERSION, VERSION_INFO

CORE_ERR: _jmexc.JMException = _jmexc.JMException(
    _os.linesep + '  CORE ERROR: An error occurred in core module.')

__all__ = ['PomParser', 'JMRepairer']

[docs]class PomParser:
    """
    A class that provides an easy way to parse and retrieve useful
    information from the provided POM file.

    Parameters
    ----------
    soup : BeautifulSoup
        A `bs4.BeautifulSoup` object representing the parsed POM file.

    """

    def __init__(self, soup: _bs4.BeautifulSoup) -> 'PomParser':
        """Create a new instance of ``PomParser`` class."""
        if not isinstance(soup, _bs4.BeautifulSoup):
            # Raise an error
            raise TypeError(f'Invalid instance class: {soup.__class__}') \
                from CORE_ERR

        self.soup: _bs4.BeautifulSoup = soup
        self.project_tag: _bs4.element.Tag = soup.find('project')

[docs]    @staticmethod
    def parse(pom_file: str, encoding: str = 'UTF-8') -> 'PomParser':
        """
        Parse the POM file (``pom.xml``) and return an instance of
        this class. Remove comments and blank lines to keep the POM clean.

        Parameters
        ----------
        pom_file : str
            The path of the pom.xml file to be parsed.

        encoding : str, optional
            The encoding used while parsing the pom.xml file. Defaults to UTF-8.

        Returns
        -------
        PomParser :
            An instance of this class.

        """

        try:
            # Read and convert the pom.xml file to BeautifulSoup object
            soup: _bs4.BeautifulSoup = _bs4.BeautifulSoup(
                ''.join(_jmutils.readfile(pom_file, encoding=encoding)), 'xml')

            # Find the comments using lambda, then extract them
            for element in soup(text=lambda t: isinstance(t, _bs4.Comment)):
                element.extract()
        except Exception as exc:
            raise exc from CORE_ERR

        # Return the instance of this class
        return PomParser(soup)

[docs]    def printsoup(self, *, pretty: bool = True, file: TextIO = _sys.stdout) -> None:
        """
        Print the ``BeautifulSoup`` object, optionally prettified, for debugging purposes.

        Parameters
        ----------
        pretty : bool, optional
            If True, the BeautifulSoup object will be prettified for better readability.
            Defaults to True.

        file : TextIO, optional
            A file-like object to which the output will be printed.
            Defaults to ``sys.stdout``.

        Notes
        -----
        This method is intended for debugging and allows you to print the
        current state of the ``BeautifulSoup`` object. The output can be customized
        with the `pretty` parameter to control prettification and the `file`
        parameter to redirect the output to a specific file-like object.

        Example
        -------
        # Print the soup to the console standard output
        >>> soup_instance.printsoup()

        # Print and save the soup to the specified file
        >>> with open('output.xml', 'w') as f:
        ...     soup_instance.printsoup(pretty=True, file=f)

        """
        print(self.soup.prettify() if pretty else str(self.soup).strip(), file=file)

[docs]    def get(self, key: Union[str, List[str]]) -> Optional[_bs4.element.Tag]:
        """
        Find the element tag based on the provided key, which can be a string
        (separated by dots) or a list of tag names. The result could be a None,
        this means that element are undefined or the users has specified wrong
        element tree path.

        Parameters
        ----------
        key : str or a list of str
            The key representing the element tree path.

        Returns
        -------
        Tag or None :
            A ``bs4.element.Tag`` object representing the desired element tag,
            or ``None`` if the element tag is undefined or cannot be found.

        """

        # Split the key if the key is a string
        keys: List[str] = key.split('.') if isinstance(key, str) else key

        # Find the element according to the first key
        result: _bs4.element.Tag = self.soup.find(keys[0])
        for k in keys[1:]:
            # Break the loop if the result is None
            if not result:
                break
            result = result.find(k)

        return result

[docs]    def get_name(self) -> Optional[str]:
        """Return the project name."""
        # => project.name
        name_element: _bs4.element.Tag = self.project_tag.find('name')
        return name_element.text if name_element else name_element

[docs]    def get_version(self) -> Optional[str]:
        """Return the project version."""
        # => project.version
        version_element: _bs4.element.Tag = self.project_tag.find('version')
        return version_element.text if version_element else version_element

[docs]    def get_id(self) -> Dict[str, Optional[str]]:
        """Return a dictionary with 'groupId' and 'artifactId'."""
        id_element: List[Optional[_bs4.element.Tag]] = [
            self.project_tag.find('groupId'),    # => project.groupId
            self.project_tag.find('artifactId')  # => project.artifactId
        ]

        return {  # Return a dictionary
            'groupId': id_element[0].text if id_element[0] else id_element[0],
            'artifactId': id_element[1].text if id_element[1] else id_element[1]
        }

[docs]    def get_url(self) -> Optional[str]:
        """Return the project URL."""
        # => project.url
        url_element: _bs4.element.Tag = self.project_tag.find('url')
        return url_element.text if url_element else url_element

[docs]    def get_inception_year(self) -> Optional[str]:
        """Return the project inception year."""
        # => project.inceptionYear
        inc_year_element: _bs4.element.Tag = self.project_tag.find('inceptionYear')
        return inc_year_element.text if inc_year_element else inc_year_element

[docs]    def get_author(self) -> Dict[str, Optional[str]]:
        """Return a dictionary with 'id', 'name', and 'url' of the project author."""
        key: str = 'project.developers.developer'
        author_element: List[Optional[_bs4.element.Tag]] = [
            self.get(key + '.id'),    # => project.developers[0].developer.id
            self.get(key + '.name'),  # => project.developers[0].developer.name
            self.get(key + '.url')    # => project.developers[0].developer.url
        ]

        return {  # Return a dictionary
            'id': author_element[0].text if author_element[0] else author_element[0],
            'name': author_element[1].text if author_element[1] else author_element[1],
            'url': author_element[2].text if author_element[2] else author_element[2],
        }

[docs]    def get_license(self) -> Dict[str, str]:
        """Return a dictionary with 'name', 'url', and 'distribution' of the project license."""
        key: str = 'project.licenses.license'
        license_element: List[Optional[_bs4.element.Tag]] = [
            self.get(key + '.name'),         # => project.licenses[0].license.name
            self.get(key + '.url'),          # => project.licenses[0].license.url
            self.get(key + '.distribution')  # => project.licenses[0].license.distribution
        ]

        return {
            'name': license_element[0].text if license_element[0] else license_element[0],
            'url': license_element[1].text if license_element[1] else license_element[1],
            'distribution': license_element[2].text if license_element[2] else license_element[2],
        }

[docs]    def get_property(self, key: str, dot: bool = True) -> Optional[str]:
        """
        Return the value of the specified property key from the POM properties.

        Parameters
        ----------
        key : str
            The property key.

        dot : bool, optional
            If True, split the key using dots. Defaults to True.

        Returns
        -------
        str or None :
            The property value if found, otherwise, returns None.

        Raises
        ------
        ValueError :
            If the provided key is an empty string or None.

        """
        # Raise an error if the provided key is an empty string or None
        if not (key or len(key)):
            raise ValueError('Key argument cannot be empty.')

        # Remove the 'properties' string tag
        key = key.replace('properties.', '') \
            if key.startswith('properties.') else key

        # Only split the dots if 'dot' argument enabled
        keys: List[str] = key.split('.') if dot else [key]
        # Add the prefix of 'properties' element tag
        if not dot or (dot and keys[0] != 'properties'):
            keys.insert(0, 'properties')  # Append to the first index

        # This way, we can prevent an error due to NoneType use
        result: _bs4.element.Tag = self.get(keys)
        return result.text if result else result


[docs]class JMRepairer:
    """
    A class for repairing manifest and properties files using information
    from a POM file.

    Parameters
    ----------
    pom : str, PomParser, or _bs4.BeautifulSoup
        The POM file, either as a path (str), a `PomParser` instance,
        or a `BeautifulSoup` object.

    Raises
    ------
    ValueError
        If the 'pom' argument is empty.

    TypeError
        If the type of 'pom' argument is unknown, neither of str,
        a `PomParser` instance, nor a `BeautifulSoup` object.

    Attributes
    ----------
    _val_pattern : re.Pattern
        Regular expression pattern for extracting values from curly
        braces in strings.

    _soup : PomParser
        Instance of `PomParser` representing the parsed POM file.

    _pom_items : Dict[str, str (could possibly None)]
        Dictionary containing key-value pairs extracted from the POM file.

    """

    def __init__(self, pom: Union[str, PomParser, _bs4.BeautifulSoup]) -> 'JMRepairer':
        """Create a new instance of this class."""
        if not pom:
            raise ValueError("Argument 'pom' cannot be empty") \
                from CORE_ERR

        if not isinstance(pom, (str, PomParser, _bs4.BeautifulSoup)):
            raise TypeError(f"Unknown type of 'pom' argument: {type(pom).__name__}") \
                from CORE_ERR

        self._val_pattern: _re.Pattern = _re.compile(r'\$\{([\w.-\[\]]+)\}')
        self._soup: PomParser = None

        if isinstance(pom, str):
            self._soup = PomParser.parse(pom)  # Need to be parsed first
        elif isinstance(pom, _bs4.BeautifulSoup):
            self._soup = PomParser(pom)        # Pass directly to the constructor
        elif isinstance(pom, PomParser):
            self._soup = pom                   # Already an instance of PomParser

        project_id: Dict[str, Optional[str]] = self._soup.get_id()
        project_author: Dict[str, Optional[str]] = self._soup.get_author()
        project_license: Dict[str, Optional[str]] = self._soup.get_license()

        self._pom_items: Dict[str, Optional[str]] = {
            'project.name': self._soup.get_name(),
            'project.version': self._soup.get_version(),
            'project.url': self._soup.get_url(),
            'project.groupId': project_id['groupId'],
            'project.artifactId': project_id['artifactId'],
            'project.inceptionYear': self._soup.get_inception_year(),
            'project.developers[0].name': project_author['name'],
            'project.developers[0].url': project_author['url'],
            'project.licenses[0].name': project_license['name'],
            'project.licenses[0].url': project_license['url'],
            'package.licenseFile': self._soup.get_property('package.licenseFile', dot=False),
            'package.mainClass': self._soup.get_property('package.mainClass', dot=False),
            'maven.build.timestamp': _dt.now(_tz.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
        }

    @classmethod
    def __write_out(cls, contents: List[str], out: str) -> None:
        """
        Write the given contents to the specified output file.

        Parameters
        ----------
        contents : a list of str
            List of strings to be written to the file.

        out : str
            Path to the output file.

        Raises
        ------
        Exception
            If an error occurs while writing to the output file.
    
        """

        parentdir: str = _os.path.dirname(out)
        if not _os.path.exists(parentdir):
            _os.mkdir(parentdir)

        try:
            with open(out, 'w', encoding='UTF-8') as o_file:
                for line in contents:
                    o_file.write(f'{line}{_os.linesep}')
        except Exception as e:
            raise e from CORE_ERR

[docs]    def fix_manifest(self, infile: str, outfile: str = None) -> None:
        """
        Fix the given manifest file by replacing placeholders with values
        from the POM file.

        Parameters
        ----------
        infile : str
            Path to the input manifest file.

        outfile : str, optional
            Path to the output manifest file. If not specified,
            the input file will be overwritten.

        Raises
        ------
        ValueError
            If the 'infile' argument is empty.

        FileNotFoundError
            If the specified input file does not exist.

        """

        if not infile:
            raise ValueError("Argument 'infile' cannot be empty") \
                from CORE_ERR

        if not _os.path.exists(infile):
            raise FileNotFoundError(f'Cannot read non-existing file: {infile!r}') \
                from CORE_ERR

        # When outfile argument not specified, then use infile
        # for the name of output file, which means will overwrite the infile
        outfile = infile if not outfile else outfile

        manifest: _jmutils.JMProperties = _jmutils.JMProperties(infile)

        # Fix the manifest
        for key, val in manifest.items():
            new_val = self._val_pattern.match(val)
            if not new_val:
                continue

            new_val = new_val[1]
            if key == 'ID':
                manifest[key] = f"{self._pom_items['project.groupId']}:" + \
                    f"{self._pom_items['project.artifactId']}"
            elif new_val in self._pom_items:
                manifest[key] = self._pom_items[new_val]

        self.__write_out(
            [f'{key}: {val}' for key, val in manifest.items()] + [''],
            out=outfile
        )

[docs]    def fix_properties(self, infile: str, outfile: str = None) -> None:
        """
        Fix the given properties file by replacing placeholders with values
        from the POM file.

        Parameters
        ----------
        infile : str
            Path to the input properties file.

        outfile : str, optional
            Path to the output properties file. If not specified,
            the input file will be overwritten.

        Raises
        ------
        ValueError
            If the 'infile' argument is empty.

        FileNotFoundError
            If the specified input file does not exist.

        """

        if not infile:
            raise ValueError("Argument 'infile' cannot be empty") \
                from CORE_ERR

        if not _os.path.exists(infile):
            raise FileNotFoundError(f'Cannot read non-existing file: {infile!r}') \
                from CORE_ERR

        # If the outfile argument were not specified, then use infile
        # for the name of output file, which means will overwrite the infile
        outfile = infile if not outfile else outfile

        # Parse the properties file
        properties: _jmutils.JMProperties = _jmutils.JMProperties(infile)

        # Fix the properties file
        for key, val in properties.items():
            new_val = self._val_pattern.match(val)
            if not new_val:
                continue

            new_val = new_val[1]
            if new_val in self._pom_items:
                properties[key] = self._pom_items[new_val]

        self.__write_out(
            [f'{key} = {val}' for key, val in properties.items()],
            out=outfile
        )


__author__       = AUTHOR
__version__      = VERSION
__version_info__ = VERSION_INFO

# Delete unused variables
del AUTHOR, VERSION, VERSION_INFO
del Dict, List, Union, Optional, TextIO

if __name__ == '__main__':
    __warn(
'''You are attempting to run this module directly (i.e. as main module), \
which is not permitted. It is designed to be imported, not as main module.''')
    _sys.exit()