"""JMBuilder's core module.
Copyright (c) 2023-2024 Ryuu Mitsuki.
"""
import os as _os
import sys as _sys
import re as _re
from datetime import datetime as _dt, timezone as _tz
from typing import Dict, List, Optional, Union, TextIO
from warnings import warn as __warn
import bs4 as _bs4
from . import utils as _jmutils
from . import exception as _jmexc
try:
from ._globals import AUTHOR, VERSION, VERSION_INFO
except (ImportError, ModuleNotFoundError, ValueError):
from pathlib import Path
# Add a new Python search path to the first index
_sys.path.insert(0, str(Path(_sys.path[0]).parent))
del Path
from jmbuilder._globals import AUTHOR, VERSION, VERSION_INFO
CORE_ERR: _jmexc.JMException = _jmexc.JMException(
_os.linesep + ' CORE ERROR: An error occurred in core module.')
__all__ = ['PomParser', 'JMRepairer']
[docs]class PomParser:
"""
A class that provides an easy way to parse and retrieve useful
information from the provided POM file.
Parameters
----------
soup : BeautifulSoup
A `bs4.BeautifulSoup` object representing the parsed POM file.
"""
def __init__(self, soup: _bs4.BeautifulSoup) -> 'PomParser':
"""Create a new instance of ``PomParser`` class."""
if not isinstance(soup, _bs4.BeautifulSoup):
# Raise an error
raise TypeError(f'Invalid instance class: {soup.__class__}') \
from CORE_ERR
self.soup: _bs4.BeautifulSoup = soup
self.project_tag: _bs4.element.Tag = soup.find('project')
[docs] @staticmethod
def parse(pom_file: str, encoding: str = 'UTF-8') -> 'PomParser':
"""
Parse the POM file (``pom.xml``) and return an instance of
this class. Remove comments and blank lines to keep the POM clean.
Parameters
----------
pom_file : str
The path of the pom.xml file to be parsed.
encoding : str, optional
The encoding used while parsing the pom.xml file. Defaults to UTF-8.
Returns
-------
PomParser :
An instance of this class.
"""
try:
# Read and convert the pom.xml file to BeautifulSoup object
soup: _bs4.BeautifulSoup = _bs4.BeautifulSoup(
''.join(_jmutils.readfile(pom_file, encoding=encoding)), 'xml')
# Find the comments using lambda, then extract them
for element in soup(text=lambda t: isinstance(t, _bs4.Comment)):
element.extract()
except Exception as exc:
raise exc from CORE_ERR
# Return the instance of this class
return PomParser(soup)
[docs] def printsoup(self, *, pretty: bool = True, file: TextIO = _sys.stdout) -> None:
"""
Print the ``BeautifulSoup`` object, optionally prettified, for debugging purposes.
Parameters
----------
pretty : bool, optional
If True, the BeautifulSoup object will be prettified for better readability.
Defaults to True.
file : TextIO, optional
A file-like object to which the output will be printed.
Defaults to ``sys.stdout``.
Notes
-----
This method is intended for debugging and allows you to print the
current state of the ``BeautifulSoup`` object. The output can be customized
with the `pretty` parameter to control prettification and the `file`
parameter to redirect the output to a specific file-like object.
Example
-------
# Print the soup to the console standard output
>>> soup_instance.printsoup()
# Print and save the soup to the specified file
>>> with open('output.xml', 'w') as f:
... soup_instance.printsoup(pretty=True, file=f)
"""
print(self.soup.prettify() if pretty else str(self.soup).strip(), file=file)
[docs] def get(self, key: Union[str, List[str]]) -> Optional[_bs4.element.Tag]:
"""
Find the element tag based on the provided key, which can be a string
(separated by dots) or a list of tag names. The result could be a None,
this means that element are undefined or the users has specified wrong
element tree path.
Parameters
----------
key : str or a list of str
The key representing the element tree path.
Returns
-------
Tag or None :
A ``bs4.element.Tag`` object representing the desired element tag,
or ``None`` if the element tag is undefined or cannot be found.
"""
# Split the key if the key is a string
keys: List[str] = key.split('.') if isinstance(key, str) else key
# Find the element according to the first key
result: _bs4.element.Tag = self.soup.find(keys[0])
for k in keys[1:]:
# Break the loop if the result is None
if not result:
break
result = result.find(k)
return result
[docs] def get_name(self) -> Optional[str]:
"""Return the project name."""
# => project.name
name_element: _bs4.element.Tag = self.project_tag.find('name')
return name_element.text if name_element else name_element
[docs] def get_version(self) -> Optional[str]:
"""Return the project version."""
# => project.version
version_element: _bs4.element.Tag = self.project_tag.find('version')
return version_element.text if version_element else version_element
[docs] def get_id(self) -> Dict[str, Optional[str]]:
"""Return a dictionary with 'groupId' and 'artifactId'."""
id_element: List[Optional[_bs4.element.Tag]] = [
self.project_tag.find('groupId'), # => project.groupId
self.project_tag.find('artifactId') # => project.artifactId
]
return { # Return a dictionary
'groupId': id_element[0].text if id_element[0] else id_element[0],
'artifactId': id_element[1].text if id_element[1] else id_element[1]
}
[docs] def get_url(self) -> Optional[str]:
"""Return the project URL."""
# => project.url
url_element: _bs4.element.Tag = self.project_tag.find('url')
return url_element.text if url_element else url_element
[docs] def get_inception_year(self) -> Optional[str]:
"""Return the project inception year."""
# => project.inceptionYear
inc_year_element: _bs4.element.Tag = self.project_tag.find('inceptionYear')
return inc_year_element.text if inc_year_element else inc_year_element
[docs] def get_author(self) -> Dict[str, Optional[str]]:
"""Return a dictionary with 'id', 'name', and 'url' of the project author."""
key: str = 'project.developers.developer'
author_element: List[Optional[_bs4.element.Tag]] = [
self.get(key + '.id'), # => project.developers[0].developer.id
self.get(key + '.name'), # => project.developers[0].developer.name
self.get(key + '.url') # => project.developers[0].developer.url
]
return { # Return a dictionary
'id': author_element[0].text if author_element[0] else author_element[0],
'name': author_element[1].text if author_element[1] else author_element[1],
'url': author_element[2].text if author_element[2] else author_element[2],
}
[docs] def get_license(self) -> Dict[str, str]:
"""Return a dictionary with 'name', 'url', and 'distribution' of the project license."""
key: str = 'project.licenses.license'
license_element: List[Optional[_bs4.element.Tag]] = [
self.get(key + '.name'), # => project.licenses[0].license.name
self.get(key + '.url'), # => project.licenses[0].license.url
self.get(key + '.distribution') # => project.licenses[0].license.distribution
]
return {
'name': license_element[0].text if license_element[0] else license_element[0],
'url': license_element[1].text if license_element[1] else license_element[1],
'distribution': license_element[2].text if license_element[2] else license_element[2],
}
[docs] def get_property(self, key: str, dot: bool = True) -> Optional[str]:
"""
Return the value of the specified property key from the POM properties.
Parameters
----------
key : str
The property key.
dot : bool, optional
If True, split the key using dots. Defaults to True.
Returns
-------
str or None :
The property value if found, otherwise, returns None.
Raises
------
ValueError :
If the provided key is an empty string or None.
"""
# Raise an error if the provided key is an empty string or None
if not (key or len(key)):
raise ValueError('Key argument cannot be empty.')
# Remove the 'properties' string tag
key = key.replace('properties.', '') \
if key.startswith('properties.') else key
# Only split the dots if 'dot' argument enabled
keys: List[str] = key.split('.') if dot else [key]
# Add the prefix of 'properties' element tag
if not dot or (dot and keys[0] != 'properties'):
keys.insert(0, 'properties') # Append to the first index
# This way, we can prevent an error due to NoneType use
result: _bs4.element.Tag = self.get(keys)
return result.text if result else result
[docs]class JMRepairer:
"""
A class for repairing manifest and properties files using information
from a POM file.
Parameters
----------
pom : str, PomParser, or _bs4.BeautifulSoup
The POM file, either as a path (str), a `PomParser` instance,
or a `BeautifulSoup` object.
Raises
------
ValueError
If the 'pom' argument is empty.
TypeError
If the type of 'pom' argument is unknown, neither of str,
a `PomParser` instance, nor a `BeautifulSoup` object.
Attributes
----------
_val_pattern : re.Pattern
Regular expression pattern for extracting values from curly
braces in strings.
_soup : PomParser
Instance of `PomParser` representing the parsed POM file.
_pom_items : Dict[str, str (could possibly None)]
Dictionary containing key-value pairs extracted from the POM file.
"""
def __init__(self, pom: Union[str, PomParser, _bs4.BeautifulSoup]) -> 'JMRepairer':
"""Create a new instance of this class."""
if not pom:
raise ValueError("Argument 'pom' cannot be empty") \
from CORE_ERR
if not isinstance(pom, (str, PomParser, _bs4.BeautifulSoup)):
raise TypeError(f"Unknown type of 'pom' argument: {type(pom).__name__}") \
from CORE_ERR
self._val_pattern: _re.Pattern = _re.compile(r'\$\{([\w.-\[\]]+)\}')
self._soup: PomParser = None
if isinstance(pom, str):
self._soup = PomParser.parse(pom) # Need to be parsed first
elif isinstance(pom, _bs4.BeautifulSoup):
self._soup = PomParser(pom) # Pass directly to the constructor
elif isinstance(pom, PomParser):
self._soup = pom # Already an instance of PomParser
project_id: Dict[str, Optional[str]] = self._soup.get_id()
project_author: Dict[str, Optional[str]] = self._soup.get_author()
project_license: Dict[str, Optional[str]] = self._soup.get_license()
self._pom_items: Dict[str, Optional[str]] = {
'project.name': self._soup.get_name(),
'project.version': self._soup.get_version(),
'project.url': self._soup.get_url(),
'project.groupId': project_id['groupId'],
'project.artifactId': project_id['artifactId'],
'project.inceptionYear': self._soup.get_inception_year(),
'project.developers[0].name': project_author['name'],
'project.developers[0].url': project_author['url'],
'project.licenses[0].name': project_license['name'],
'project.licenses[0].url': project_license['url'],
'package.licenseFile': self._soup.get_property('package.licenseFile', dot=False),
'package.mainClass': self._soup.get_property('package.mainClass', dot=False),
'maven.build.timestamp': _dt.now(_tz.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
}
@classmethod
def __write_out(cls, contents: List[str], out: str) -> None:
"""
Write the given contents to the specified output file.
Parameters
----------
contents : a list of str
List of strings to be written to the file.
out : str
Path to the output file.
Raises
------
Exception
If an error occurs while writing to the output file.
"""
parentdir: str = _os.path.dirname(out)
if not _os.path.exists(parentdir):
_os.mkdir(parentdir)
try:
with open(out, 'w', encoding='UTF-8') as o_file:
for line in contents:
o_file.write(f'{line}{_os.linesep}')
except Exception as e:
raise e from CORE_ERR
[docs] def fix_manifest(self, infile: str, outfile: str = None) -> None:
"""
Fix the given manifest file by replacing placeholders with values
from the POM file.
Parameters
----------
infile : str
Path to the input manifest file.
outfile : str, optional
Path to the output manifest file. If not specified,
the input file will be overwritten.
Raises
------
ValueError
If the 'infile' argument is empty.
FileNotFoundError
If the specified input file does not exist.
"""
if not infile:
raise ValueError("Argument 'infile' cannot be empty") \
from CORE_ERR
if not _os.path.exists(infile):
raise FileNotFoundError(f'Cannot read non-existing file: {infile!r}') \
from CORE_ERR
# When outfile argument not specified, then use infile
# for the name of output file, which means will overwrite the infile
outfile = infile if not outfile else outfile
manifest: _jmutils.JMProperties = _jmutils.JMProperties(infile)
# Fix the manifest
for key, val in manifest.items():
new_val = self._val_pattern.match(val)
if not new_val:
continue
new_val = new_val[1]
if key == 'ID':
manifest[key] = f"{self._pom_items['project.groupId']}:" + \
f"{self._pom_items['project.artifactId']}"
elif new_val in self._pom_items:
manifest[key] = self._pom_items[new_val]
self.__write_out(
[f'{key}: {val}' for key, val in manifest.items()] + [''],
out=outfile
)
[docs] def fix_properties(self, infile: str, outfile: str = None) -> None:
"""
Fix the given properties file by replacing placeholders with values
from the POM file.
Parameters
----------
infile : str
Path to the input properties file.
outfile : str, optional
Path to the output properties file. If not specified,
the input file will be overwritten.
Raises
------
ValueError
If the 'infile' argument is empty.
FileNotFoundError
If the specified input file does not exist.
"""
if not infile:
raise ValueError("Argument 'infile' cannot be empty") \
from CORE_ERR
if not _os.path.exists(infile):
raise FileNotFoundError(f'Cannot read non-existing file: {infile!r}') \
from CORE_ERR
# If the outfile argument were not specified, then use infile
# for the name of output file, which means will overwrite the infile
outfile = infile if not outfile else outfile
# Parse the properties file
properties: _jmutils.JMProperties = _jmutils.JMProperties(infile)
# Fix the properties file
for key, val in properties.items():
new_val = self._val_pattern.match(val)
if not new_val:
continue
new_val = new_val[1]
if new_val in self._pom_items:
properties[key] = self._pom_items[new_val]
self.__write_out(
[f'{key} = {val}' for key, val in properties.items()],
out=outfile
)
__author__ = AUTHOR
__version__ = VERSION
__version_info__ = VERSION_INFO
# Delete unused variables
del AUTHOR, VERSION, VERSION_INFO
del Dict, List, Union, Optional, TextIO
if __name__ == '__main__':
__warn(
'''You are attempting to run this module directly (i.e. as main module), \
which is not permitted. It is designed to be imported, not as main module.''')
_sys.exit()