#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
This module provides functions for generating random Boolean functions and
Boolean networks with specified structural and dynamical properties.
The :mod:`~boolforge.generate` module enables the systematic creation of
Boolean functions and networks that satisfy particular constraints, such as
specified canalization depth, sensitivity range, bias, or connectivity.
Generated instances can be used for statistical analysis, benchmarking, or
simulation studies.
Several generation routines leverage Numba acceleration for efficient sampling
and evaluation of large function spaces. While Numba is **recommended** to
achieve near-native performance, it is **not required** for functionality; all
functions have pure Python fallbacks.
This module complements :mod:`~boolforge.boolean_function` and
:mod:`~boolforge.boolean_network` by facilitating reproducible generation of
synthetic test cases and large ensembles of random networks.
Example
-------
>>> import boolforge
>>> boolforge.random_function(n=3)
>>> boolforge.random_network(N=5, n=2)
"""
import math
import numpy as np
from ..boolean_function import BooleanFunction
from .. import utils
from .functions import random_function_with_bias
from .functions import random_non_degenerate_function
[docs]
def random_non_canalizing_function(
n: int,
bias: float = 0.5,
*,
rng=None,
) -> BooleanFunction:
"""
Generate a random non-canalizing Boolean function.
A Boolean function is canalizing if there exists at least one variable
and a value of that variable such that fixing it forces the output of
the function. This function samples Boolean functions from the
Bernoulli(bias) ensemble until a non-canalizing function is obtained.
Parameters
----------
n : int
Number of Boolean variables. Must satisfy ``n > 1``.
bias : float, optional
Probability that a truth-table entry equals 1. Default is 0.5.
rng : int, np.random.Generator, np.random.RandomState, random.Random, or None, optional
Random number generator or seed specification. Passed to
``utils._coerce_rng``.
Returns
-------
BooleanFunction
Random non-canalizing Boolean function on ``n`` variables.
Raises
------
ValueError
If ``n`` is not an integer greater than 1.
ValueError
If ``bias`` is not strictly between 0 and 1.
Notes
-----
- This function uses rejection sampling.
- For moderate bias values, almost all Boolean functions are
non-canalizing.
- Extremely biased functions are more likely to be canalizing and may
lead to longer sampling times.
References
----------
C. Kadelka, J. Kuipers, and R. Laubenbacher (2017).
The influence of canalization on the robustness of Boolean networks.
Physica D: Nonlinear Phenomena, 353, 39–47.
"""
if not isinstance(n, (int, np.integer)) or n <= 1:
raise ValueError("n must be an integer greater than 1")
if not isinstance(bias, (float, np.floating)) or not (0.0 < bias < 1.0):
raise ValueError("bias must be a float strictly between 0 and 1")
rng = utils._coerce_rng(rng)
# Rejection sampling; most Boolean functions are non-canalizing
while True:
f = random_function_with_bias(n, bias=bias, rng=rng)
if not f.is_canalizing():
return f
[docs]
def random_non_canalizing_non_degenerate_function(
n: int,
bias: float = 0.5,
*,
rng=None,
) -> BooleanFunction:
"""
Generate a random Boolean function that is both non-canalizing and
non-degenerate.
A Boolean function is non-canalizing if no variable can force the output
when fixed, and non-degenerate if every variable is essential. This
function samples Boolean functions from the Bernoulli(bias) ensemble
until both properties are satisfied.
Parameters
----------
n : int
Number of Boolean variables. Must satisfy ``n > 1``.
bias : float, optional
Probability that a truth-table entry equals 1. Default is 0.5.
rng : int, np.random.Generator, np.random.RandomState, random.Random, or None, optional
Random number generator or seed specification. Passed to
``utils._coerce_rng``.
Returns
-------
BooleanFunction
Random Boolean function on ``n`` variables that is both non-canalizing
and non-degenerate.
Raises
------
ValueError
If ``n`` is not an integer greater than 1.
ValueError
If ``bias`` is not strictly between 0 and 1.
Notes
-----
- This function uses rejection sampling.
- For moderate bias values and sufficiently large ``n``, almost all
Boolean functions are both non-canalizing and non-degenerate.
- Extremely biased functions are more likely to be canalizing or
degenerate and may lead to longer sampling times.
References
----------
C. Kadelka, J. Kuipers, and R. Laubenbacher (2017).
The influence of canalization on the robustness of Boolean networks.
Physica D: Nonlinear Phenomena, 353, 39–47.
"""
if not isinstance(n, (int, np.integer)) or n <= 1:
raise ValueError("n must be an integer greater than 1")
if not isinstance(bias, (float, np.floating)) or not (0.0 < bias < 1.0):
raise ValueError("bias must be a float strictly between 0 and 1")
rng = utils._coerce_rng(rng)
# Rejection sampling; almost all Boolean functions satisfy both properties
while True:
f = random_function_with_bias(n, bias=bias, rng=rng)
if not f.is_canalizing() and not f.is_degenerate():
return f
_uniform_over_functions_weights = {}
def _get_uniform_over_functions_weights(max_n, is_ncf=True):
"""
Compute dynamic-programming weights for uniform canalized layer structures.
This function constructs a dynamic-programming table ``W`` used to sample
canalized output bitstrings with probability proportional to the inverse
factorials of their layer sizes. The table encodes the total weight of all
valid completions of a partially constructed canalized structure.
Parameters
----------
max_n : int
Maximum total length of the canalized output string.
is_ncf : bool, optional
If True (default), enforce the nested canalizing function (NCF)
constraint that the final canalizing layer has size at least 2.
If False, no constraint is imposed on the final layer size.
Returns
-------
W : ndarray of shape (max_n + 1, max_n + 2)
Dynamic-programming weight table. ``W[m, s]`` gives the total weight of
all valid completions with ``m`` positions remaining and current layer
size ``s``.
Notes
-----
The recursion is given by
W[m, s] = W[m - 1, s + 1] + (1 / s!) * W[m - 1, 1],
corresponding to either extending the current canalizing layer or
terminating it and starting a new layer of size 1. The base case ``m = 0``
accounts for the weight of the final layer and enforces the optional NCF
constraint.
Results are cached internally to avoid recomputation for repeated calls
with the same ``max_n`` and ``NCF`` values.
"""
if (max_n,is_ncf) in _uniform_over_functions_weights:
return _uniform_over_functions_weights[(max_n,is_ncf)]
W = np.zeros((max_n + 1, max_n + 2), dtype=float)
# Base case: no positions left -> close final run
inv_factorial_of_s = 1.0
for s in range(1, max_n + 2):
inv_factorial_of_s /= s
if (not is_ncf) or (s >= 2):
W[0, s] = inv_factorial_of_s
else:
W[0, s] = 0.0
# DP
for m in range(1, max_n + 1):
inv_factorial_of_s = 1.0
for s in range(1, max_n + 1):
inv_factorial_of_s /= s
W[m, s] = (
W[m - 1, s + 1] +
inv_factorial_of_s * W[m - 1, 1]
)
_uniform_over_functions_weights[(max_n,is_ncf)] = W
return W
def sample_canalized_outputs_uniform_over_functions(n, W, *, rng):
"""
Sample a canalized output bitstring yielding uniform layer-structure weighting.
This function samples a binary vector ``b`` of length ``n`` representing
canalized output values, where consecutive equal values are part of the same
canalizing layer. The probability of a given layer structure
``(k_1, k_2, ..., k_r)`` is proportional to
1 / (k_1! k_2! ... k_r!).
Sampling is performed sequentially using precomputed dynamic-programming
weights ``W``, stored in _uniform_over_functions_weights.
Parameters
----------
n : int
Length of the output bitstring to sample.
W : ndarray of shape (n+1, n+1)
Dynamic-programming weight table, where ``W[m, s]`` gives the total
weight of all valid completions with ``m`` positions remaining and
current layer size ``s``.
rng : numpy.random.Generator
Random number generator used for sampling.
Returns
-------
b : ndarray of shape (n,), dtype int
Sampled binary canalized output vector.
Notes
-----
The bitstring is generated left-to-right. At each step, the algorithm
probabilistically chooses whether to extend the current layer or start a
new one, using the weights in ``W`` to ensure correct global sampling
probabilities. The first bit is chosen uniformly at random.
"""
rng = utils._coerce_rng(rng)
b = np.zeros(n, dtype=int)
# Randomly pick first canalized output
b[0] = rng.integers(2)
s = 1 # current layer size
inv_factorial_of_s = 1.0
m = n - 1 # positions remaining to fill
for i in range(1, n):
# DP-weighted decision
w_extend = W[m - 1, s + 1]
w_split = inv_factorial_of_s * W[m - 1, 1]
p_extend = w_extend / (w_extend + w_split)
if rng.random() < p_extend:
b[i] = b[i - 1]
s += 1
inv_factorial_of_s /= s
else:
b[i] = 1 - b[i - 1]
s = 1
inv_factorial_of_s = 1.0
m -= 1
return b
[docs]
def random_k_canalizing_function(
n: int,
k: int,
exact_depth: bool = False,
uniform_over_functions: bool = True,
*,
rng=None,
) -> BooleanFunction:
"""
Generate a random k-canalizing Boolean function in n variables.
A Boolean function is k-canalizing if it has at least k conditionally
canalizing variables. If ``exact_depth`` is True, the function has exactly
k conditionally canalizing variables; otherwise, its canalizing depth
may exceed k.
Parameters
----------
n : int
Number of Boolean variables.
k : int
Requested canalizing depth. Must satisfy ``0 <= k <= n``.
Setting ``k = n`` generates a nested canalizing function.
exact_depth : bool, optional
If True, enforce that the canalizing depth is exactly ``k``.
If False (default), the depth is at least ``k``.
uniform_over_functions : bool, optional
If True (default), the function is sampled uniformly at random
from the set of Boolean functions consistent with the specified
constraints (n, k, exact_depth).
Internally, this is achieved by a rejection sampling scheme that
compensates for all combinatorial multiplicities arising from
symmetric canalizing layers and possible merges between the
outer canalizing layers and the core function.
If False, canalized outputs are sampled independently as a bitstring,
and no rejection correction is applied. In this case, the resulting
distribution is biased toward Boolean functions with more symmetric
canalizing structures.
rng : int, numpy.random.Generator, numpy.random.RandomState, random.Random, or None, optional
Random number generator or seed specification. Passed to
``utils._coerce_rng``.
Returns
-------
BooleanFunction
A Boolean function on ``n`` variables with canalizing depth at
least ``k`` (or exactly ``k`` if ``exact_depth=True``).
Raises
------
AssertionError
If ``n`` is not a positive integer.
AssertionError
If ``k`` does not satisfy ``0 <= k <= n``.
AssertionError
If ``exact_depth=True`` and ``k = n-1`` (no such functions exist).
Notes
-----
When uniform_over_functions=True, this function samples uniformly from the
space of Boolean functions with the specified canalizing properties.
As a consequence, different canalizing layer structures generally
occur with different frequencies, reflecting the fact that they
support different numbers of Boolean functions.
Uniformity over canalizing layer structures is not enforced and is
not expected.
The construction follows the standard decomposition of a k-canalizing
function into canalizing variables, canalizing inputs and outputs, and
a residual core function on ``n-k`` variables.
References
----------
He, Q., and Macauley, M. (2016).
Stratification and enumeration of Boolean functions by canalizing depth.
Physica D: Nonlinear Phenomena, 314, 1–8.
Dimitrova, E., Stigler, B., Kadelka, C., and Murrugarra, D. (2022).
Revealing the canalizing structure of Boolean functions: Algorithms
and applications. Automatica, 146, 110630.
"""
rng = utils._coerce_rng(rng)
assert isinstance(n, (int, np.integer)) and n > 0, "n must be a positive integer"
assert n - k != 1 or not exact_depth, (
"There are no functions of exact canalizing depth n-1.\nEither set exact_depth=False or ensure k != n-1"
)
assert isinstance(k, (int, np.integer)) and 0 <= k and k <= n, (
"k, the canalizing depth, must satisfy 0 <= k <= n."
)
if k==0:
if exact_depth:
if n == 1:
raise ValueError(
"No Boolean functions with canalizing depth 0 exist for n = 1."
)
return random_non_canalizing_non_degenerate_function(n, rng=rng)
else:
return random_non_degenerate_function(n, rng=rng)
elif k==n-1 and n>1: #canalizing functions with depth n-1>0 really have depth n
k=n
# Step 1: canalizing inputs and variables
aas = rng.integers(2, size=k)
can_vars = rng.choice(n, k, replace=False)
while True:
#include the generation of canalized outputs in the rejection sampling scheme
#because some output vectors `bbs` may give rise to more distinct functions than others
# Step 2: canalized outputs, determining layers
if uniform_over_functions:
bbs = sample_canalized_outputs_uniform_over_functions(
k,
_get_uniform_over_functions_weights(k, is_ncf=(k >= n)),
rng=rng,
)
else:
bbs = rng.integers(2, size=k)
# Step 3: sample core function using efficient rejection sampling
if k < n:
core_function = random_k_canalizing_function(
n=n - k,
k=0,
exact_depth=exact_depth,#True if exact_depth or uniform_over_functions else False,
rng=rng,
)
if exact_depth or k==0 or not uniform_over_functions:
break
else:
#check if the core function is canalizing and correct for combinatorial
#bias in the selection of the final functions
#compute canalizing info of core_function, stored in f.properties
core_function.get_layer_structure()
if core_function.properties['CanalizingDepth'] == 0:
break
else: #rejection sampling is efficient because it happens with probability <= 50%
bbs_core = core_function.properties['CanalizedOutputs']
if bbs_core[0] != bbs[-1]:
#no combinatorial explosion, the core varaibles start a new layer
break
else:
# merge occurs: last layer grows
# determine size of last outer layer in bbs
s = 1
for i in range(k - 1, 0, -1):
if bbs[i] == bbs[i - 1]:
s += 1
else:
break
# determine number of additional variables in the same layer
s_core = 1
for i in range(core_function.properties['CanalizingDepth'] - 1):
if bbs_core[i] == bbs_core[i + 1]:
s_core += 1
else:
break
accept_prob = 1 / math.comb(s + s_core, s)
if rng.random() <= accept_prob:
break
else:
core_function = [1 - bbs[-1]]
break
# Step 4: build truth table and return canalizing Boolean function
left_side_of_truth_table = utils.get_left_side_of_truth_table(n)
f = np.full(2**n, -1, dtype=np.int8)
for j in range(k):
mask = (left_side_of_truth_table[:, can_vars[j]] == aas[j]) & (f < 0)
f[mask] = bbs[j]
# fill remaining with core truth table
f[f < 0] = np.asarray(core_function, dtype=np.int8)
return BooleanFunction._from_f_unchecked(f)
[docs]
def random_k_canalizing_function_with_specific_layer_structure(
n: int,
layer_structure: list,
exact_depth: bool = False,
*,
rng=None,
) -> BooleanFunction:
"""
Generate a random Boolean function with a specified canalizing layer structure.
The canalizing layer structure is given as a list
``[k_1, ..., k_r]``, where each ``k_i`` specifies the number of
canalizing variables in the i-th layer. The total canalizing depth is
``sum(layer_structure)``.
If ``sum(layer_structure) == n`` and ``n > 1``, the function is a nested
canalizing function and the final layer is required to have size at
least 2.
Parameters
----------
n : int
Total number of Boolean variables.
layer_structure : list of int
Canalizing layer structure ``[k_1, ..., k_r]``. Each entry must be
at least 1. If ``sum(layer_structure) == n`` and ``n > 1``, the final
entry must satisfy ``layer_structure[-1] >= 2``.
exact_depth : bool, optional
If True, enforce that the canalizing depth is exactly
``sum(layer_structure)``. If False (default), additional canalizing
variables may occur in the core function.
rng : int, numpy.random.Generator, numpy.random.RandomState, random.Random, or None, optional
Random number generator or seed specification. Passed to
``utils._coerce_rng``.
Returns
-------
BooleanFunction
A Boolean function on ``n`` variables with the prescribed canalizing
layer structure, plus potentially more canalizing variables (if exact_depth=True).
Raises
------
AssertionError
If ``n`` is not a positive integer.
AssertionError
If ``sum(layer_structure)`` does not satisfy ``0 <= sum(layer_structure) <= n``.
AssertionError
If ``exact_depth=True`` and ``sum(layer_structure) = n - 1``.
AssertionError
If ``sum(layer_structure) = n > 1`` and the final layer has size less
than 2.
AssertionError
If any entry of ``layer_structure`` is less than 1.
Notes
-----
For fixed parameter values, this function samples uniformly at random
from the ensemble of Boolean functions consistent with the specified
canalizing layer structure and additional constraints.
The construction follows the standard decomposition of a canalizing
function into ordered canalizing layers and a residual core function on
the remaining variables.
References
----------
He, Q., and Macauley, M. (2016).
Stratification and enumeration of Boolean functions by canalizing depth.
Physica D: Nonlinear Phenomena, 314, 1–8.
Kadelka, C., Kuipers, J., and Laubenbacher, R. (2017).
The influence of canalization on the robustness of Boolean networks.
Physica D: Nonlinear Phenomena, 353, 39–47.
"""
rng = utils._coerce_rng(rng)
depth = sum(layer_structure) # canalizing depth
if depth == 0:
layer_structure = [0]
assert isinstance(n, (int, np.integer)) and n > 0, "n must be an integer > 0"
assert n - depth != 1 or not exact_depth, (
"There are no functions of exact canalizing depth n-1.\nEither set exact_depth=False or ensure depth=sum(layer_structure)!=n-1."
)
assert 0 <= depth and depth <= n, "Ensure 0 <= depth = sum(layer_structure) <= n."
assert depth < n or layer_structure[-1] > 1 or n == 1, (
"The last layer of an NCF (i.e., an n-canalizing function) has to have size >= 2 whenever n > 1.\nIf depth=sum(layer_structure)=n, ensure that layer_structure[-1]>=2."
)
assert min(layer_structure) >= 1, (
"Each layer must have at least one variable (each element of layer_structure must be >= 1)."
)
size_state_space = 2**n
aas = rng.integers(2, size=depth) # canalizing inputs
b0 = rng.integers(2)
bbs = [b0] * layer_structure[0] # canalized outputs for first layer
for i in range(1, len(layer_structure)):
if i % 2 == 0:
bbs.extend([b0] * layer_structure[i])
else:
bbs.extend([1 - b0] * layer_structure[i])
can_vars = rng.choice(n, depth, replace=False)
f = np.zeros(size_state_space, dtype=int)
if depth < n:
core_function = random_k_canalizing_function(
n=n - depth,
k=0,
exact_depth=exact_depth,
rng=rng,
)
else:
core_function = [1 - bbs[-1]]
left_side_of_truth_table = utils.get_left_side_of_truth_table(n)
f = np.full(2**n, -1, dtype=np.int8)
for j in range(depth):
mask = (left_side_of_truth_table[:, can_vars[j]] == aas[j]) & (f < 0)
f[mask] = bbs[j]
# fill remaining with core truth table
f[f < 0] = np.asarray(core_function, dtype=np.int8)
return BooleanFunction._from_f_unchecked(f)
[docs]
def random_NCF(
n: int,
uniform_over_functions: bool = True,
layer_structure: list | None = None,
*,
rng=None
) -> BooleanFunction:
"""
Generate a random nested canalizing Boolean function in n variables.
A nested canalizing function (NCF) is an n-canalizing Boolean function,
i.e., a function whose canalizing depth equals the number of variables.
Optionally, a specific canalizing layer structure may be prescribed.
Parameters
----------
n : int
Total number of Boolean variables.
uniform_over_functions : bool, optional
If True (default) and ``layer_structure`` is None, canalizing layer
structures are sampled uniformly at random, removing the bias toward
symmetric structures induced by independent sampling of canalized
outputs. If False, canalized outputs are sampled independently and
uniformly as a bitstring, which biases the distribution toward more
symmetric layer structures.
This parameter is ignored if ``layer_structure`` is provided.
layer_structure : list of int or None, optional
Canalizing layer structure ``[k_1, ..., k_r]``. Each entry must be at
least 1. If provided, it must satisfy ``sum(layer_structure) == n``.
If ``n > 1``, the final entry must satisfy
``layer_structure[-1] >= 2``. If None (default), the layer structure
is sampled at random.
rng : int, numpy.random.Generator, numpy.random.RandomState, random.Random, or None, optional
Random number generator or seed specification. Passed to
``utils._coerce_rng``.
Returns
-------
BooleanFunction
A nested canalizing Boolean function on ``n`` variables.
Raises
------
AssertionError
If ``n`` is not a positive integer.
AssertionError
If ``layer_structure`` is provided but does not satisfy
``sum(layer_structure) == n``.
AssertionError
If ``n > 1`` and the final layer has size less than 2.
Notes
-----
For fixed parameter values, this function samples uniformly at random
from the ensemble of nested canalizing Boolean functions consistent with
the specified constraints. Non-uniformity arises only when
``uniform_over_functions=False``.
This function is a convenience wrapper around
``random_k_canalizing_function`` and
``random_k_canalizing_function_with_specific_layer_structure``.
References
----------
He, Q., and Macauley, M. (2016).
Stratification and enumeration of Boolean functions by canalizing depth.
Physica D: Nonlinear Phenomena, 314, 1–8.
Kadelka, C., Kuipers, J., and Laubenbacher, R. (2017).
The influence of canalization on the robustness of Boolean networks.
Physica D: Nonlinear Phenomena, 353, 39–47.
"""
rng = utils._coerce_rng(rng)
if layer_structure is None:
return random_k_canalizing_function(n,
n,
uniform_over_functions=uniform_over_functions,
exact_depth=False,
rng=rng)
else:
assert sum(layer_structure) == n, "Ensure sum(layer_structure) == n."
assert layer_structure[-1] > 1 or n == 1, (
"The last layer of an NCF has to have size >= 2 whenever n > 1.\nEnsure that layer_structure[-1]>=2."
)
return random_k_canalizing_function_with_specific_layer_structure(
n,
layer_structure,
exact_depth=False,
rng=rng
)