Source code for boolforge.generate.canalization

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
This module provides functions for generating random Boolean functions and
Boolean networks with specified structural and dynamical properties.

The :mod:`~boolforge.generate` module enables the systematic creation of
Boolean functions and networks that satisfy particular constraints, such as
specified canalization depth, sensitivity range, bias, or connectivity.
Generated instances can be used for statistical analysis, benchmarking, or
simulation studies.

Several generation routines leverage Numba acceleration for efficient sampling
and evaluation of large function spaces. While Numba is **recommended** to
achieve near-native performance, it is **not required** for functionality; all
functions have pure Python fallbacks.

This module complements :mod:`~boolforge.boolean_function` and
:mod:`~boolforge.boolean_network` by facilitating reproducible generation of
synthetic test cases and large ensembles of random networks.

Example
-------
>>> import boolforge
>>> boolforge.random_function(n=3)
>>> boolforge.random_network(N=5, n=2)
"""

import math
import numpy as np

from ..boolean_function import BooleanFunction
from .. import utils

from .functions import random_function_with_bias
from .functions import random_non_degenerate_function


[docs]
def random_non_canalizing_function(
    n: int,
    bias: float = 0.5,
    *,
    rng=None,
) -> BooleanFunction:
    """
    Generate a random non-canalizing Boolean function.

    A Boolean function is canalizing if there exists at least one variable
    and a value of that variable such that fixing it forces the output of
    the function. This function samples Boolean functions from the
    Bernoulli(bias) ensemble until a non-canalizing function is obtained.

    Parameters
    ----------
    n : int
        Number of Boolean variables. Must satisfy ``n > 1``.
    bias : float, optional
        Probability that a truth-table entry equals 1. Default is 0.5.
    rng : int, np.random.Generator, np.random.RandomState, random.Random, or None, optional
        Random number generator or seed specification. Passed to
        ``utils._coerce_rng``.

    Returns
    -------
    BooleanFunction
        Random non-canalizing Boolean function on ``n`` variables.

    Raises
    ------
    ValueError
        If ``n`` is not an integer greater than 1.
    ValueError
        If ``bias`` is not strictly between 0 and 1.

    Notes
    -----
    - This function uses rejection sampling.
    - For moderate bias values, almost all Boolean functions are
      non-canalizing.
    - Extremely biased functions are more likely to be canalizing and may
      lead to longer sampling times.

    References
    ----------
    C. Kadelka, J. Kuipers, and R. Laubenbacher (2017).
    The influence of canalization on the robustness of Boolean networks.
    Physica D: Nonlinear Phenomena, 353, 39–47.
    """
    if not isinstance(n, (int, np.integer)) or n <= 1:
        raise ValueError("n must be an integer greater than 1")

    if not isinstance(bias, (float, np.floating)) or not (0.0 < bias < 1.0):
        raise ValueError("bias must be a float strictly between 0 and 1")

    rng = utils._coerce_rng(rng)

    # Rejection sampling; most Boolean functions are non-canalizing
    while True:
        f = random_function_with_bias(n, bias=bias, rng=rng)
        if not f.is_canalizing():
            return f




[docs]
def random_non_canalizing_non_degenerate_function(
    n: int,
    bias: float = 0.5,
    *,
    rng=None,
) -> BooleanFunction:
    """
    Generate a random Boolean function that is both non-canalizing and
    non-degenerate.

    A Boolean function is non-canalizing if no variable can force the output
    when fixed, and non-degenerate if every variable is essential. This
    function samples Boolean functions from the Bernoulli(bias) ensemble
    until both properties are satisfied.

    Parameters
    ----------
    n : int
        Number of Boolean variables. Must satisfy ``n > 1``.
    bias : float, optional
        Probability that a truth-table entry equals 1. Default is 0.5.
    rng : int, np.random.Generator, np.random.RandomState, random.Random, or None, optional
        Random number generator or seed specification. Passed to
        ``utils._coerce_rng``.

    Returns
    -------
    BooleanFunction
        Random Boolean function on ``n`` variables that is both non-canalizing
        and non-degenerate.

    Raises
    ------
    ValueError
        If ``n`` is not an integer greater than 1.
    ValueError
        If ``bias`` is not strictly between 0 and 1.

    Notes
    -----
    - This function uses rejection sampling.
    - For moderate bias values and sufficiently large ``n``, almost all
      Boolean functions are both non-canalizing and non-degenerate.
    - Extremely biased functions are more likely to be canalizing or
      degenerate and may lead to longer sampling times.

    References
    ----------
    C. Kadelka, J. Kuipers, and R. Laubenbacher (2017).
    The influence of canalization on the robustness of Boolean networks.
    Physica D: Nonlinear Phenomena, 353, 39–47.
    """
    if not isinstance(n, (int, np.integer)) or n <= 1:
        raise ValueError("n must be an integer greater than 1")

    if not isinstance(bias, (float, np.floating)) or not (0.0 < bias < 1.0):
        raise ValueError("bias must be a float strictly between 0 and 1")

    rng = utils._coerce_rng(rng)

    # Rejection sampling; almost all Boolean functions satisfy both properties
    while True:
        f = random_function_with_bias(n, bias=bias, rng=rng)
        if not f.is_canalizing() and not f.is_degenerate():
            return f


_uniform_over_functions_weights = {}

def _get_uniform_over_functions_weights(max_n, is_ncf=True):
    """
    Compute dynamic-programming weights for uniform canalized layer structures.

    This function constructs a dynamic-programming table ``W`` used to sample
    canalized output bitstrings with probability proportional to the inverse
    factorials of their layer sizes. The table encodes the total weight of all
    valid completions of a partially constructed canalized structure.

    Parameters
    ----------
    max_n : int
        Maximum total length of the canalized output string.
    is_ncf : bool, optional
        If True (default), enforce the nested canalizing function (NCF)
        constraint that the final canalizing layer has size at least 2.
        If False, no constraint is imposed on the final layer size.

    Returns
    -------
    W : ndarray of shape (max_n + 1, max_n + 2)
        Dynamic-programming weight table. ``W[m, s]`` gives the total weight of
        all valid completions with ``m`` positions remaining and current layer
        size ``s``.

    Notes
    -----
    The recursion is given by

        W[m, s] = W[m - 1, s + 1] + (1 / s!) * W[m - 1, 1],

    corresponding to either extending the current canalizing layer or
    terminating it and starting a new layer of size 1. The base case ``m = 0``
    accounts for the weight of the final layer and enforces the optional NCF
    constraint.

    Results are cached internally to avoid recomputation for repeated calls
    with the same ``max_n`` and ``NCF`` values.
    """
    if (max_n,is_ncf) in _uniform_over_functions_weights:
        return _uniform_over_functions_weights[(max_n,is_ncf)]
    
    W = np.zeros((max_n + 1, max_n + 2), dtype=float)

    # Base case: no positions left -> close final run
    inv_factorial_of_s = 1.0
    for s in range(1, max_n + 2):
        inv_factorial_of_s /= s
        if (not is_ncf) or (s >= 2):
            W[0, s] = inv_factorial_of_s
        else:
            W[0, s] = 0.0

    # DP
    for m in range(1, max_n + 1):
        inv_factorial_of_s = 1.0
        for s in range(1, max_n + 1):
            inv_factorial_of_s /= s
            W[m, s] = (
                W[m - 1, s + 1] +
                inv_factorial_of_s * W[m - 1, 1]
            )
    
    _uniform_over_functions_weights[(max_n,is_ncf)] = W    
    return W


def sample_canalized_outputs_uniform_over_functions(n, W, *, rng):
    """
    Sample a canalized output bitstring yielding uniform layer-structure weighting.

    This function samples a binary vector ``b`` of length ``n`` representing
    canalized output values, where consecutive equal values are part of the same
    canalizing layer. The probability of a given layer structure
    ``(k_1, k_2, ..., k_r)`` is proportional to

        1 / (k_1! k_2! ... k_r!).
        
    Sampling is performed sequentially using precomputed dynamic-programming
    weights ``W``, stored in _uniform_over_functions_weights.

    Parameters
    ----------
    n : int
        Length of the output bitstring to sample.
    W : ndarray of shape (n+1, n+1)
        Dynamic-programming weight table, where ``W[m, s]`` gives the total
        weight of all valid completions with ``m`` positions remaining and
        current layer size ``s``.
    rng : numpy.random.Generator
        Random number generator used for sampling.

    Returns
    -------
    b : ndarray of shape (n,), dtype int
        Sampled binary canalized output vector.

    Notes
    -----
    The bitstring is generated left-to-right. At each step, the algorithm
    probabilistically chooses whether to extend the current layer or start a
    new one, using the weights in ``W`` to ensure correct global sampling
    probabilities. The first bit is chosen uniformly at random.
    """
    rng = utils._coerce_rng(rng)

    b = np.zeros(n, dtype=int)

    # Randomly pick first canalized output
    b[0] = rng.integers(2)

    s = 1              # current layer size
    inv_factorial_of_s = 1.0
    m = n - 1          # positions remaining to fill

    for i in range(1, n):

        # DP-weighted decision
        w_extend = W[m - 1, s + 1]
        w_split  = inv_factorial_of_s * W[m - 1, 1]

        p_extend = w_extend / (w_extend + w_split)
        if rng.random() < p_extend:
            b[i] = b[i - 1]
            s += 1
            inv_factorial_of_s /= s
        else:
            b[i] = 1 - b[i - 1]
            s = 1
            inv_factorial_of_s = 1.0

        m -= 1

    return b


[docs]
def random_k_canalizing_function(
    n: int,
    k: int,
    exact_depth: bool = False,
    uniform_over_functions: bool = True,
    *,
    rng=None,
) -> BooleanFunction:
    """
    Generate a random k-canalizing Boolean function in n variables.

    A Boolean function is k-canalizing if it has at least k conditionally 
    canalizing variables. If ``exact_depth`` is True, the function has exactly
    k conditionally canalizing variables; otherwise, its canalizing depth 
    may exceed k.

    Parameters
    ----------
    n : int
        Number of Boolean variables.
    k : int
        Requested canalizing depth. Must satisfy ``0 <= k <= n``.
        Setting ``k = n`` generates a nested canalizing function.
    exact_depth : bool, optional
        If True, enforce that the canalizing depth is exactly ``k``.
        If False (default), the depth is at least ``k``.
    uniform_over_functions : bool, optional
        If True (default), the function is sampled uniformly at random
        from the set of Boolean functions consistent with the specified
        constraints (n, k, exact_depth).
        
        Internally, this is achieved by a rejection sampling scheme that
        compensates for all combinatorial multiplicities arising from
        symmetric canalizing layers and possible merges between the
        outer canalizing layers and the core function.
    
        If False, canalized outputs are sampled independently as a bitstring,
        and no rejection correction is applied. In this case, the resulting
        distribution is biased toward Boolean functions with more symmetric
        canalizing structures.
    rng : int, numpy.random.Generator, numpy.random.RandomState, random.Random, or None, optional
        Random number generator or seed specification. Passed to
        ``utils._coerce_rng``.

    Returns
    -------
    BooleanFunction
        A Boolean function on ``n`` variables with canalizing depth at
        least ``k`` (or exactly ``k`` if ``exact_depth=True``).

    Raises
    ------
    AssertionError
        If ``n`` is not a positive integer.
    AssertionError
        If ``k`` does not satisfy ``0 <= k <= n``.
    AssertionError
        If ``exact_depth=True`` and ``k = n-1`` (no such functions exist).

    Notes
    -----
    When uniform_over_functions=True, this function samples uniformly from the
    space of Boolean functions with the specified canalizing properties.
    As a consequence, different canalizing layer structures generally
    occur with different frequencies, reflecting the fact that they
    support different numbers of Boolean functions.
    
    Uniformity over canalizing layer structures is not enforced and is
    not expected.

    The construction follows the standard decomposition of a k-canalizing
    function into canalizing variables, canalizing inputs and outputs, and
    a residual core function on ``n-k`` variables.

    References
    ----------
    He, Q., and Macauley, M. (2016).
        Stratification and enumeration of Boolean functions by canalizing depth.
        Physica D: Nonlinear Phenomena, 314, 1–8.

    Dimitrova, E., Stigler, B., Kadelka, C., and Murrugarra, D. (2022).
        Revealing the canalizing structure of Boolean functions: Algorithms
        and applications. Automatica, 146, 110630.
    """
    rng = utils._coerce_rng(rng)

    assert isinstance(n, (int, np.integer)) and n > 0, "n must be a positive integer"
    assert n - k != 1 or not exact_depth, (
        "There are no functions of exact canalizing depth n-1.\nEither set exact_depth=False or ensure k != n-1"
    )
    assert isinstance(k, (int, np.integer)) and 0 <= k and k <= n, (
        "k, the canalizing depth, must satisfy 0 <= k <= n."
    )
    
    if k==0:
        if exact_depth:
            if n == 1:
                raise ValueError(
                    "No Boolean functions with canalizing depth 0 exist for n = 1."
                )
            return random_non_canalizing_non_degenerate_function(n, rng=rng)
        else:
            return random_non_degenerate_function(n, rng=rng)
    elif k==n-1 and n>1: #canalizing functions with depth n-1>0 really have depth n 
        k=n

    # Step 1: canalizing inputs and variables
    aas = rng.integers(2, size=k)
    can_vars = rng.choice(n, k, replace=False)

    while True: 
        #include the generation of canalized outputs in the rejection sampling scheme
        #because some output vectors `bbs` may give rise to more distinct functions than others
        
        # Step 2: canalized outputs, determining layers
        if uniform_over_functions:
            bbs = sample_canalized_outputs_uniform_over_functions(
                k,
                _get_uniform_over_functions_weights(k, is_ncf=(k >= n)),
                rng=rng,
            )
        else:
            bbs = rng.integers(2, size=k)
            
        # Step 3: sample core function using efficient rejection sampling
        if k < n:
            core_function = random_k_canalizing_function(
                n=n - k,
                k=0,
                exact_depth=exact_depth,#True if exact_depth or uniform_over_functions else False,
                rng=rng,
            )
            
            if exact_depth or k==0 or not uniform_over_functions:
                break
            else:
                #check if the core function is canalizing and correct for combinatorial
                #bias in the selection of the final functions
                
                #compute canalizing info of core_function, stored in f.properties
                core_function.get_layer_structure() 
                if core_function.properties['CanalizingDepth'] == 0:
                    break
                else: #rejection sampling is efficient because it happens with probability <= 50%
                    bbs_core = core_function.properties['CanalizedOutputs']
                    if bbs_core[0] != bbs[-1]:  
                        #no combinatorial explosion, the core varaibles start a new layer
                        break
                    else: 
                        # merge occurs: last layer grows
                        
                        # determine size of last outer layer in bbs
                        s = 1
                        for i in range(k - 1, 0, -1):
                            if bbs[i] == bbs[i - 1]:
                                s += 1
                            else:
                                break
                            
                        # determine number of additional variables in the same layer
                        s_core = 1
                        for i in range(core_function.properties['CanalizingDepth'] - 1):
                            if bbs_core[i] == bbs_core[i + 1]:
                                s_core += 1
                            else:
                                break                    
                        
                        accept_prob = 1 / math.comb(s + s_core, s)
                        
                        if rng.random() <= accept_prob:
                            break
        else:
            core_function = [1 - bbs[-1]]
            break
        
    # Step 4: build truth table and return canalizing Boolean function
    left_side_of_truth_table = utils.get_left_side_of_truth_table(n)
    f = np.full(2**n, -1, dtype=np.int8)
    
    for j in range(k):
        mask = (left_side_of_truth_table[:, can_vars[j]] == aas[j]) & (f < 0)
        f[mask] = bbs[j]
        
    # fill remaining with core truth table
    f[f < 0] = np.asarray(core_function, dtype=np.int8)

    return BooleanFunction._from_f_unchecked(f)




[docs]
def random_k_canalizing_function_with_specific_layer_structure(
    n: int,
    layer_structure: list,
    exact_depth: bool = False,
    *,
    rng=None,
) -> BooleanFunction:
    """
    Generate a random Boolean function with a specified canalizing layer structure.

    The canalizing layer structure is given as a list
    ``[k_1, ..., k_r]``, where each ``k_i`` specifies the number of
    canalizing variables in the i-th layer. The total canalizing depth is
    ``sum(layer_structure)``.

    If ``sum(layer_structure) == n`` and ``n > 1``, the function is a nested
    canalizing function and the final layer is required to have size at
    least 2.

    Parameters
    ----------
    n : int
        Total number of Boolean variables.
    layer_structure : list of int
        Canalizing layer structure ``[k_1, ..., k_r]``. Each entry must be
        at least 1. If ``sum(layer_structure) == n`` and ``n > 1``, the final
        entry must satisfy ``layer_structure[-1] >= 2``.
    exact_depth : bool, optional
        If True, enforce that the canalizing depth is exactly
        ``sum(layer_structure)``. If False (default), additional canalizing
        variables may occur in the core function.
    rng : int, numpy.random.Generator, numpy.random.RandomState, random.Random, or None, optional
        Random number generator or seed specification. Passed to
        ``utils._coerce_rng``.

    Returns
    -------
    BooleanFunction
        A Boolean function on ``n`` variables with the prescribed canalizing
        layer structure, plus potentially more canalizing variables (if exact_depth=True).

    Raises
    ------
    AssertionError
        If ``n`` is not a positive integer.
    AssertionError
        If ``sum(layer_structure)`` does not satisfy ``0 <= sum(layer_structure) <= n``.
    AssertionError
        If ``exact_depth=True`` and ``sum(layer_structure) = n - 1``.
    AssertionError
        If ``sum(layer_structure) = n > 1`` and the final layer has size less
        than 2.
    AssertionError
        If any entry of ``layer_structure`` is less than 1.

    Notes
    -----
    For fixed parameter values, this function samples uniformly at random
    from the ensemble of Boolean functions consistent with the specified
    canalizing layer structure and additional constraints.

    The construction follows the standard decomposition of a canalizing
    function into ordered canalizing layers and a residual core function on
    the remaining variables.

    References
    ----------
    He, Q., and Macauley, M. (2016).
        Stratification and enumeration of Boolean functions by canalizing depth.
        Physica D: Nonlinear Phenomena, 314, 1–8.

    Kadelka, C., Kuipers, J., and Laubenbacher, R. (2017).
        The influence of canalization on the robustness of Boolean networks.
        Physica D: Nonlinear Phenomena, 353, 39–47.
    """
    rng = utils._coerce_rng(rng)
    depth = sum(layer_structure)  # canalizing depth
    if depth == 0:
        layer_structure = [0]

    assert isinstance(n, (int, np.integer)) and n > 0, "n must be an integer > 0"
    assert n - depth != 1 or not exact_depth, (
        "There are no functions of exact canalizing depth n-1.\nEither set exact_depth=False or ensure depth=sum(layer_structure)!=n-1."
    )
    assert 0 <= depth and depth <= n, "Ensure 0 <= depth = sum(layer_structure) <= n."
    assert depth < n or layer_structure[-1] > 1 or n == 1, (
        "The last layer of an NCF (i.e., an n-canalizing function) has to have size >= 2 whenever n > 1.\nIf depth=sum(layer_structure)=n, ensure that layer_structure[-1]>=2."
    )
    assert min(layer_structure) >= 1, (
        "Each layer must have at least one variable (each element of layer_structure must be >= 1)."
    )

    size_state_space = 2**n
    aas = rng.integers(2, size=depth)  # canalizing inputs
    b0 = rng.integers(2)
    bbs = [b0] * layer_structure[0]  # canalized outputs for first layer
    for i in range(1, len(layer_structure)):
        if i % 2 == 0:
            bbs.extend([b0] * layer_structure[i])
        else:
            bbs.extend([1 - b0] * layer_structure[i])
    can_vars = rng.choice(n, depth, replace=False)
    f = np.zeros(size_state_space, dtype=int)
    if depth < n:
        core_function = random_k_canalizing_function(
            n=n - depth,
            k=0,
            exact_depth=exact_depth,
            rng=rng,
        )
    else:
        core_function = [1 - bbs[-1]]

    left_side_of_truth_table = utils.get_left_side_of_truth_table(n)
    f = np.full(2**n, -1, dtype=np.int8)
    for j in range(depth):
        mask = (left_side_of_truth_table[:, can_vars[j]] == aas[j]) & (f < 0)
        f[mask] = bbs[j]
    # fill remaining with core truth table
    f[f < 0] = np.asarray(core_function, dtype=np.int8)

    return BooleanFunction._from_f_unchecked(f)




[docs]
def random_NCF(
    n: int,
    uniform_over_functions: bool = True,
    layer_structure: list | None = None,
    *,
    rng=None
) -> BooleanFunction:
    """
    Generate a random nested canalizing Boolean function in n variables.

    A nested canalizing function (NCF) is an n-canalizing Boolean function,
    i.e., a function whose canalizing depth equals the number of variables.
    Optionally, a specific canalizing layer structure may be prescribed.

    Parameters
    ----------
    n : int
        Total number of Boolean variables.
    uniform_over_functions : bool, optional
        If True (default) and ``layer_structure`` is None, canalizing layer
        structures are sampled uniformly at random, removing the bias toward
        symmetric structures induced by independent sampling of canalized
        outputs. If False, canalized outputs are sampled independently and
        uniformly as a bitstring, which biases the distribution toward more
        symmetric layer structures.
        This parameter is ignored if ``layer_structure`` is provided.
    layer_structure : list of int or None, optional
        Canalizing layer structure ``[k_1, ..., k_r]``. Each entry must be at
        least 1. If provided, it must satisfy ``sum(layer_structure) == n``.
        If ``n > 1``, the final entry must satisfy
        ``layer_structure[-1] >= 2``. If None (default), the layer structure
        is sampled at random.
    rng : int, numpy.random.Generator, numpy.random.RandomState, random.Random, or None, optional
        Random number generator or seed specification. Passed to
        ``utils._coerce_rng``.

    Returns
    -------
    BooleanFunction
        A nested canalizing Boolean function on ``n`` variables.

    Raises
    ------
    AssertionError
        If ``n`` is not a positive integer.
    AssertionError
        If ``layer_structure`` is provided but does not satisfy
        ``sum(layer_structure) == n``.
    AssertionError
        If ``n > 1`` and the final layer has size less than 2.

    Notes
    -----
    For fixed parameter values, this function samples uniformly at random
    from the ensemble of nested canalizing Boolean functions consistent with
    the specified constraints. Non-uniformity arises only when
    ``uniform_over_functions=False``.

    This function is a convenience wrapper around
    ``random_k_canalizing_function`` and
    ``random_k_canalizing_function_with_specific_layer_structure``.

    References
    ----------
    He, Q., and Macauley, M. (2016).
        Stratification and enumeration of Boolean functions by canalizing depth.
        Physica D: Nonlinear Phenomena, 314, 1–8.

    Kadelka, C., Kuipers, J., and Laubenbacher, R. (2017).
        The influence of canalization on the robustness of Boolean networks.
        Physica D: Nonlinear Phenomena, 353, 39–47.
    """
    rng = utils._coerce_rng(rng)
    if layer_structure is None:
        return random_k_canalizing_function(n, 
                                            n, 
                                            uniform_over_functions=uniform_over_functions,
                                            exact_depth=False, 
                                            rng=rng)
    else:
        assert sum(layer_structure) == n, "Ensure sum(layer_structure) == n."
        assert layer_structure[-1] > 1 or n == 1, (
            "The last layer of an NCF has to have size >= 2 whenever n > 1.\nEnsure that layer_structure[-1]>=2."
        )
        return random_k_canalizing_function_with_specific_layer_structure(
            n, 
            layer_structure, 
            exact_depth=False, 
            rng=rng
        )