Source code for dstk.adaptors.typeguards

"""
Provides a set of type guard functions to safely and explicitly check the types of various token and workflow-related objects.

These functions help with runtime type checking and enable more precise type hinting and static analysis when working with linguistic data structures such as:

* POS-tagged word lists
* Collocates lists
* Sentences (token or string sequences)
* Workflow step definitions
* Token-based collocates

By using these type guards, code can branch safely based on the structure and types of input data, improving robustness and developer experience.

Example:

.. code-block:: python

    if is_pos_tags(tokens):
        # tokens is now narrowed to POSTaggedWordList type
        process_pos_tags(tokens)
"""

from typing import Any, TypeGuard
from ..lib_types import POSTaggedWordList, CollocatesList, Sentences, Token, Workflow, POSTaggedWord, Bigram, Collocates

[docs] def is_pos_tags(tokens: Any) -> TypeGuard[POSTaggedWordList]: """ Checks if the input is a list of POS-tagged words (POSTaggedWordList). :param tokens: The object to check. :type tokens: Any :return: True if `tokens` is a non-empty list where all elements are instances of POSTaggedWord, otherwise False. :rtype: bool """ if not isinstance(tokens, list) or not tokens: return False return all(isinstance(item, POSTaggedWord) for item in tokens)
[docs] def is_collocates(tokens: Any) -> TypeGuard[CollocatesList]: """ Checks if the input is a list of collocate tuples, where each tuple contains strings or Token instances, cexcluding types like POSTaggedWord or Bigram. :param tokens: The object to check. :type tokens: Any :return: True if `tokens` is a non-empty list of tuples of strings or Token instances (excluding POSTaggedWord and Bigram), otherwise False. :rtype: bool """ if not isinstance(tokens, list) or not tokens: return False return all( isinstance(item, tuple) and not isinstance(item, POSTaggedWord) and not isinstance(item, Bigram) and all( isinstance(word, str) or isinstance(word, Token) for word in item ) for item in tokens )
[docs] def is_sentence(tokens: Any) -> TypeGuard[Sentences]: """ Checks if the input is a list of sentences, where each sentence is either: * A list of Token instances, * A list of strings, or * A list of POSTaggedWord instances. :param tokens: The object to check. :type tokens: Any :return: True if `tokens` matches the described sentence structure, otherwise False. :rtype: bool """ if not isinstance(tokens, list) or not tokens: return False return ( isinstance(tokens, list) and all( ( isinstance(item, list) and ( all(isinstance(token, Token) for token in item) or all(isinstance(token, str) for token in item) ) ) or is_pos_tags(item) for item in tokens ) )
[docs] def is_workflow(workflow: Any) -> TypeGuard[Workflow]: """ Checks if the input is a workflow structure, i.e., a non-empty list of dictionaries where each dictionary maps string method names to argument dictionaries with string keys. :param workflow: The object to check. :type workflow: Any :return: True if `workflow` matches the workflow structure, otherwise False. :rtype: bool """ if not isinstance(workflow, list) or not workflow: return False return all( isinstance(method, dict) and all( isinstance(key, str) and isinstance(value, dict) and all( isinstance(arg, str) for arg in value.keys() ) for key, value in method.items() ) for method in workflow )
[docs] def is_token_collocates(collocates: Collocates) -> TypeGuard[Collocates[Token]]: """ Checks if the input collocates tuple consists exclusively of Token instances and excludes Bigram and POSTaggedWord types. :param collocates: The collocates tuple to check. :type collocates: Collocates :return: True if all elements in `collocates` are Token instances and not Bigram or POSTaggedWord, otherwise False. :rtype: bool """ return isinstance(collocates, tuple) and not isinstance(collocates, Bigram) and not isinstance(collocates, POSTaggedWord) and all( isinstance(token, Token) for token in collocates )