Source code for relaton.serializers.bibxml.anchor

import re
from typing import List, Tuple, Callable, Optional

from ...models.bibdata import BibliographicItem, DocID
from ...util import as_list

__all__ = (
    'get_suitable_anchor',
    'to_valid_xsid',
    'XSID_REGEX',
    'XSID_ILLEGAL',
    'ANCHOR_FORMATTERS',
)


[docs]def get_suitable_anchor(item: BibliographicItem) -> str: """From a :class:`~relaton.models.bibdata.BibliographicItem` instance get best anchor value and return it as a string. Tries :data:`~.ANCHOR_FORMATTERS`, and if none return a string then takes the first primary ``docid``, (or the first ``docid`` with ``scope`` equal to “anchor”, or just the first docid). Ensures the value matches XSID schema. :param item: a :class:`bib_models.bibdata.BibliographicItem` instance :returns str: a string to be used as anchor :rtype: str :raises ValueError: unable to obtain an anchor, e.g. item has no docids """ docids: List[DocID] = as_list(item.docid or []) try: anchor_docid: str = ( # Prefer bespoke [custom_anchor for d in docids for formatter in ANCHOR_FORMATTERS if (custom_anchor := formatter(d))] # Otherwise, prefer primary or [d.id for d in docids if d.primary and XSID_REGEX.match(d.id) is not None] # Fallback case (docid.scope may be going away) or [d.id for d in docids if getattr(d, 'scope', '') == 'anchor' and XSID_REGEX.match(d.id) is not None] # Otherwise, take any docid or [d.id for d in docids])[0] except IndexError: raise ValueError("No suitable anchor could be determined") else: if XSID_REGEX.match(anchor_docid) is not None: return anchor_docid else: return to_valid_xsid(anchor_docid)
[docs]def to_valid_xsid(val: str) -> str: """ Transforms a string into a valid xs:id value. Transformation is lossy and irreversible. """ return XSID_ILLEGAL.sub('', re.sub( r'^\d', r'_\g<0>', re.sub( r'[-\s]+', '_', val .replace('/', '-') .replace(':', '.') .strip('-_') ) ))
XSID_REGEX = re.compile(r'^[a-zA-Z_][-.\w]*$') """A regular expression matching a full valid xs:id value.""" XSID_ILLEGAL = re.compile(r'[^-.\w]') """A regular expression matching xs:id characters that are invalid anywhere within an xs:id string.""" ANCHOR_FORMATTERS: Tuple[Callable[[DocID], Optional[str]]] = ( (lambda docid: f"RFC{docid.id.split(' ')[1].zfill(4)}" if all([ docid.primary, docid.type == 'IETF', docid.id.startswith('RFC '), ]) else None), ) """Custom anchor formatters. Each function must take a :class:`relaton.models.bibdata.DocID` instance and produce either an anchor string or ``None``. """