# Source code for biSBM.utils

""" Utilities for network data manipulation and entropy computation. """
import heapq
from .int_part import *
from numba import njit
from scipy.sparse import lil_matrix, coo_matrix
from scipy.special import comb
from itertools import product, combinations
from loky import get_reusable_executor

[docs]def db_factorial_ln(val):
m = int(val)
if m & 0x1 == 1:  # m is odd
return gammaln(m + 1) - gammaln((m - 1) / 2 + 1) - ((m - 1) / 2) * np.log(2)
else:
return gammaln(m / 2 + 1) + (m / 2) * np.log(2)

# #################
# ENTROPY FUNCTIONS
# #################

# @njit(cache=True)
[docs]def partition_entropy(ka=None, kb=None, k=None, na=None, nb=None, n=None, nr=None, allow_empty=False):
"""partition_entropy

Compute the partition entropy, P(b), for the current partition. It has several variations depending on the priors
used. In the crudest way (compute_profile_likelihood_from_e_rsnr == None), we formulate P(b) = P(b | B) * P(B).
Or, by a two-level Bayesian hierarchy,
we can do P(b) = P(b | n) * P(n | B) * P(B).

Parameters
----------
ka : int
Number of communities in type-*a*.

kb : int
Number of communities in type-*b*.

k : int
Number of communities in the graph.

na : int
Number of vertices in type-*a*.

nb : int
Number of vertices in type-*b*.

n : int
Number of vertices in the graph.

nr : :class:numpy.ndarray
Vertex property array of the block graph which contains the block sizes.

allow_empty : bool (optional, default: False)
If True, partition description length computed will allow for empty groups.

Returns
-------
ent : float
The description length (or entropy) in nat of the partition.

"""
if type(n) is int:
n = np.array([n])
k = np.array([k])
elif type(na) is int and type(nb) is int:
na = np.array([na])
ka = np.array([ka])
nb = np.array([nb])
kb = np.array([kb])

if nr is None:
ent = n * np.log(k) + np.log1p(-(1 - 1. / k) ** n)  # TODO: check this term
else:
if ka is None and kb is None and k is not None:
if allow_empty:
ent = lbinom(k + n - 1, n)
else:
ent = lbinom(n - 1, k - 1)
ent += (gammaln(n + 1) - gammaln(nr + 1).sum()) + np.log(n)  # TODO: check the last term (should be alright)
elif ka is not None and kb is not None and k is None:
if allow_empty:
# TODO
raise NotImplementedError
else:
ent = lbinom(na - 1, ka - 1) + lbinom(nb - 1, kb - 1)
ent += (gammaln(na + 1) + gammaln(nb + 1) - gammaln(nr + 1).sum()) + np.log(na) + np.log(nb)
else:
raise AttributeError
return ent

Calculate the entropy (a.k.a. negative log-likelihood) associated with the current block partition. It does not
include the model entropy.

Parameters
----------
edgelist : :class:numpy.ndarray

mb : :class:numpy.ndarray
Partition :math:b of nodes into blocks.

exact : bool

multigraph : bool

Returns
-------
ent : float
The description length (or entropy) in nat of the fitting.
"""
ent = 0.
e_rs = np.zeros((max(mb) + 1, max(mb) + 1), dtype=np.int_)
m_ij = lil_matrix((len(mb), len(mb)), dtype=np.int_)

for i in edgelist:
# Please do check the index convention of the edgelist
source_group = int(mb[int(i[0])])
target_group = int(mb[int(i[1])])
e_rs[source_group][target_group] += 1
e_rs[target_group][source_group] += 1
m_ij[int(i[0]), int(i[1])] += 1  # we only update the upper triangular part of the adj-matrix
italic_i = 0.
e_r = np.sum(e_rs, axis=1, dtype=np.int_)
sum_m_ii = 0.
sum_m_ij = 0.
sum_e_rs = 0.
sum_e_rr = 0.
sum_e_r = 0.
if exact:
if multigraph:
ind_i, ind_j = m_ij.nonzero()
for ind in zip(ind_i, ind_j):
val = m_ij[ind[0], ind[1]]
if val > 1:
if ind[0] == ind[1]:
sum_m_ii += db_factorial_ln(val)
else:
sum_m_ij += gammaln(val + 1)
for _e_r in e_r:
sum_e_r += gammaln(_e_r + 1)

for ind, e_val in enumerate(np.nditer(e_rs)):
ind_i = int(np.floor(ind / (e_rs.shape[0])))
ind_j = ind % (e_rs.shape[0])
if exact:
if ind_j > ind_i:
sum_e_rs += gammaln(e_val + 1)
elif ind_j == ind_i:
sum_e_rr += db_factorial_ln(e_val)
else:
if e_val != 0.0:
italic_i += e_val * np.log(
e_val / e_r[ind_i] / e_r[ind_j]
)

ent += -italic_i / 2
n_k = assemble_n_k_from_edgelist(edgelist, mb)

ent_deg = 0
for deg, k in enumerate(n_k):
if deg != 0 and k != 0:
ent_deg -= k * gammaln(deg + 1)

ent += ent_deg
if exact:
return ent_deg - sum_e_rs - sum_e_rr + sum_e_r + sum_m_ii + sum_m_ij
else:
num_edges = len(edgelist)
ent += -num_edges
return ent

[docs]def model_entropy(e, ka=None, kb=None, na=None, nb=None, nr=None, allow_empty=False, is_bipartite=True):
"""model_entropy

Computes the amount of information necessary for the parameters of the (bipartite) blockmodel ensemble,
for ka type-a blocks, kb type-b blocks, na type-a vertices,
nb vertices, e edges, and either bipartite or general as a prior. This includes the entropy from
modeling the edge counts and the partition.

Note that if we know a priori that the network is bipartite, we can further compress the model.

Parameters
----------
e : int
Number of edges.

ka : int
Number of communities in type-*a*.

kb : int
Number of communities in type-*b*.

na : int
Number of vertices in type-*a*.

nb : int
Number of vertices in type-*b*.

nr : :class:numpy.ndarray
Vertex property array of the block graph which contains the block sizes.

allow_empty : bool (optional, default: False)
If True, partition description length computed will allow for empty groups.

is_bipartite : bool (optional, default: True)
If False, edge counts description length computed will assume a purely flat :math:e_{rs}.

Returns
-------
dl : float
The description length (or entropy) in nat of the model.

References
----------

.. [peixoto-parsimonious-2013] Tiago P. Peixoto, "Parsimonious module
inference in large networks", Phys. Rev. Lett. 110, 148701 (2013),
:doi:10.1103/PhysRevLett.110.148701, :arxiv:1212.4794.
.. [peixoto-nonparametric-2017] Tiago P. Peixoto, "Nonparametric
Bayesian inference of the microcanonical stochastic block model",
Phys. Rev. E 95 012317 (2017), :doi:10.1103/PhysRevE.95.012317,
:arxiv:1610.02703

"""
if not is_bipartite:
k = ka + kb
x = (k * (k + 1)) / 2
else:
x = ka * kb

if nr is False:
dl = lbinom(x + e - 1, e)
else:
dl = lbinom(x + e - 1, e) + partition_entropy(ka=ka, kb=kb, na=na, nb=nb, nr=nr, allow_empty=allow_empty)
# TO use the general prior for the partition entropy, replace the function with:
# n = na + nb
# partition_entropy(k=k, n=n, nr=nr, allow_empty=allow_empty)
return dl

[docs]def degree_entropy(edgelist, mb, __q_cache=np.array([], ndmin=2), degree_dl_kind="distributed",
q_cache_max_e_r=int(1e4)):
"""degree_entropy

degree_entropy

Parameters
----------
edgelist : iterable or :class:numpy.ndarray

mb : iterable or :class:numpy.ndarray
Partition :math:b of nodes into blocks.

__q_cache : :class:numpy.ndarray (required, default: np.array([], ndmin=2))

degree_dl_kind: str

1. degree_dl_kind == "uniform"

This corresponds to a non-informative prior, where the node
degrees are sampled from an uniform distribution.

2. degree_dl_kind == "distributed"

This option should be preferred in most cases.

Returns
-------
ent : float
The entropy.

"""
ent = 0
n_r = assemble_n_r_from_mb(mb)

e_r = np.zeros(max(mb) + 1)
for i in edgelist:
# Please do check the index convention of the edgelist
source_group = int(mb[int(i[0])])
target_group = int(mb[int(i[1])])
e_r[source_group] += 1
e_r[target_group] += 1

if degree_dl_kind == "uniform":
ent += np.sum(lbinom(n_r + e_r - 1, e_r))
elif degree_dl_kind == "distributed":
if len(__q_cache) == 1:
__q_cache = init_q_cache(q_cache_max_e_r, __q_cache)  # the pre-computed lookup table affects precision!
for ind, n_r_ in enumerate(n_r):
ent += log_q(e_r[ind], n_r_, __q_cache)
eta_rk = assemble_eta_rk_from_edgelist_and_mb(edgelist, mb)

for mb_, eta_rk_ in enumerate(eta_rk):
for eta in eta_rk_:
ent -= +gammaln(eta + 1)
ent -= -gammaln(n_r[mb_] + 1)
elif degree_dl_kind == "entropy":
raise NotImplementedError
return ent

[docs]def virtual_moves_ds(ori_e_rs, mlists, ka):
"""virtual_moves_ds

virtual_moves_ds

Parameters
----------
ori_e_rs : :class:numpy.ndarray

mlists : set

ka : int
Number of communities in type-*a*.

Returns
-------
dS : float

_mlist : :class:numpy.ndarray

"""
ori_e_r = np.sum(ori_e_rs, axis=1)
size = ori_e_rs.shape[0] - 1
t = np.inf
dS = 0.
_mlist = np.zeros(2, dtype=np.int_)

for _ in mlists:
mlist = np.int_([_.split("+")[0], _.split("+")[1]])
if (np.max(mlist) >= ka > np.max(mlist)) or (np.max(mlist) == 0 and ka == 1) or (
np.max(mlist) == ka and ori_e_rs.shape[0] == 1 + ka):
continue
else:
if np.max(mlist) < ka:  # we are merging groups of type-a
_1, _2 = ori_e_rs[[mlist[0], mlist[1]], ka + 1: size + 1]
else:
_1, _2 = ori_e_rs[[mlist[0], mlist[1]], 0: ka]
_ds = 0
_ds -= np.sum(gammaln(_1 + _2 + 1))
_ds += np.sum(gammaln(_1 + 1))
_ds += np.sum(gammaln(_2 + 1))

_3, _4 = ori_e_r[[mlist[0], mlist[1]]]
_ds += gammaln(_3 + _4 + 1)
_ds -= gammaln(_3 + 1) + gammaln(_4 + 1)

if 0 <= _ds < t:
t = _ds
dS, _mlist = _ds, mlist

return dS, _mlist

# ####################
# GENERATION FUNCTIONS
# ####################

[docs]def gen_equal_partition(n, total):
"""gen_equal_partition

gen_equal_partition

Parameters
----------
n : int

total : int

Returns
-------
n_blocks : list

"""
all_nodes = np.arange(total)
n_blocks = list(map(len, np.array_split(all_nodes, n)))

return n_blocks

[docs]def gen_unequal_partition(n, total, avg_deg, alpha):
"""gen_unequal_partition

Parameters
----------
n : int
Number of communities.

total : int
Number of nodes.

avg_deg : float
Average degree of the network.

alpha : float
The parameter of the Dirichlet distribution (the smaller the unevener the distribution is).
We usually use alpha=1 to generate a case and
filter it out if the lowest size of a community is less than (2 * total / avg_deg) ** 0.5 (resolution limit).

Returns
-------
a : list
The sizes of communities to sum to total.

_ : float
The ratio of largest-sized community to the lowest-sized one.

"""
cutoff = (2 * total / avg_deg) ** 0.5
d = np.random.dirichlet([alpha] * n, 1)[0]

while not np.all(d * total > cutoff):
d = np.random.dirichlet([alpha] * n, 1)[0]

a = list(map(int, d * total))
remain_a = int(total - sum(a))
for idx, _ in enumerate(range(remain_a)):
a[idx] += 1

return a, max(a) / min(a)

[docs]def gen_e_rs(b, n_edges, p=0):
"""gen_e_rs

Parameters
----------
b : int
Number of communities within each type. (suppose Ka = Kb)

n_edges : int
Number of edges planted in the system.

p : float
Edge propensity between groups; i.e., the ratio :math:c_{out} / c_{in}.

Returns
-------
e_rs : :class:numpy.ndarray
Edge counts matrix.

"""
c = n_edges / (b + (b ** 2 - b) * p)
c_in = c
c_out = c * p
e_rs = np.zeros((b * 2, b * 2), dtype=np.int_)

perm = product(range(0, b), range(b, b * 2))
idx_in = np.linspace(0, b ** 2 - 1, b, dtype=np.int_)
for idx, p in enumerate(perm):
i = p[0]
j = p[1]
if idx in idx_in:
e_rs[i][j] = c_in
else:
e_rs[i][j] = c_out
e_rs[j][i] = e_rs[i][j]
return e_rs

[docs]def gen_e_rs_harder(ka, kb, n_edges, samples=1, top_k=1):
"""gen_e_rs_harder

Parameters
----------
ka : int
Number of communities within type-*a*.

kb : int
Number of communities within type-*b*.

n_edges : int
Number of edges planted in the system.

samples : int
Number of random draws made on :math:e_{rs}.

top_k : int
Number of samples selected. These are top-k samples with higher profile likelihood.

Returns
-------
e_rs : :class:numpy.ndarray or list[numpy.ndarray] (when top_k > 1)
Edge counts matrix.

"""
if top_k <= 0:
raise ValueError("Argument top_k needs to be a positive integer.")
e_rs_inst = []
for _ in range(int(samples)):
c = np.int_(np.random.dirichlet([1] * ka * kb, 1)[0] * n_edges)
e_rs = np.zeros((ka + kb, ka + kb), dtype=np.int_)
remain_c = n_edges - np.sum(c, dtype=np.int_)
for idx, _ in enumerate(range(remain_c)):
c[idx] += 1
perm = product(range(0, ka), range(ka, ka + kb))
for idx, p in enumerate(perm):
i = p[0]
j = p[1]
e_rs[i][j] = c[idx]
e_rs[j][i] = e_rs[i][j]
heapq.heappush(e_rs_inst, (- compute_profile_likelihood_from_e_rs(e_rs), e_rs))
e_rs = heapq.heappop(e_rs_inst)[1]
if samples == 1 or top_k == 1:
return e_rs
else:
e_rs = []
for _ in range(top_k):
e_rs += [heapq.heappop(e_rs_inst)[1]]
return e_rs

[docs]def gen_e_rs_hard(ka, kb, n_edges, p=0):
"""gen_e_rs_hard

Parameters
----------
ka : int
Mumber of communities within type-*a*.

kb : int
Number of communities within type-*b*.

n_edges : int
Number of edges planted in the system.

p : float
Edge propensity between groups; i.e., the ratio :math:c_{out} / c_{in}.

Returns
-------
e_rs : :class:numpy.ndarray
Edge counts matrix.

"""
k_max = max(ka, kb)
k_min = min(ka, kb)
if k_max > 2 ** k_min - 1:
raise NotImplementedError
else:
blocks = 0
k_min_ = 1
_cum_comb = 0
cum_comb = comb(k_min, 1)
nonzero_indices = []
for i in range(1, k_max + 1):
if i > cum_comb:
k_min_ += 1
_cum_comb = int(cum_comb)
cum_comb += comb(k_min, k_min_)

blocks += k_min_
for __i in list(combinations(range(k_min), k_min_))[i - _cum_comb - 1]:
nonzero_indices += [(__i, i - 1 + k_min)]

c = n_edges / (blocks + (ka * kb - blocks) * p)
c_in = c
c_out = c * p

e_rs = np.zeros((ka + kb, ka + kb), dtype=np.int_)
perm = product(range(0, ka), range(ka, ka + kb))
for _, p in enumerate(perm):
i = p[0]
j = p[1]
if (i, j) in nonzero_indices:
e_rs[i][j] = c_in
else:
e_rs[i][j] = c_out
e_rs[j][i] = e_rs[i][j]
return e_rs

[docs]def gen_equal_bipartite_partition(na, nb, ka, kb):
"""gen_equal_bipartite_partition

Parameters
----------
na : int
Number of nodes in type-*a*.

nb : int
Number of nodes in type-*b*.

ka : int
Number of communities in type-*a*.

kb : int
Number of communities in type-*b*.

Returns
-------
n : :class:numpy.ndarray

"""
n_blocks = map(int, gen_equal_partition(ka, na) + gen_equal_partition(kb, nb))
n = []
for idx, i in enumerate(n_blocks):
n += [idx] * i
return np.array(n, dtype=np.int_)

[docs]@njit(cache=True)
def gen_bicliques_edgelist(b, num_nodes):
"""Generate an array of edgelist and node-type mapping for a group of bi-cliques.

Parameters
----------
b : int
Number of bi-cliques.
num_nodes : int
Number of nodes (size) for each bi-clique.

Returns
-------
el : :class:numpy.ndarray
The edgelist of a group of bi-cliques.

types : :class:numpy.ndarray
The types-array that maps each node id to a bipartite type (0 or 1).

"""
total_num_nodes = b * num_nodes
types = np.zeros(total_num_nodes, dtype=np.int_)
num_edges_each_clique = int(num_nodes ** 2 / 4)
el = np.zeros((num_edges_each_clique * b, 2), dtype=np.int_)

idx = 0
base = 0
for _b in range(0, b):
for i in range(0, int(num_nodes / 2)):
types[i + base] = 1
for j in range(int(num_nodes / 2), num_nodes):
types[j + base] = 2
el[idx] = [i + base, j + base]
idx += 1
base = (_b + 1) * num_nodes
return el, types

[docs]def assemble_old2new_mapping(types):
"""Create a mapping that map the old node-id's to new ones, such that the types-array is sorted orderly.

Parameters
----------
types : :class:numpy.ndarray

Returns
-------
old2new : dict
Dictionary that maps the old node index to a new one.

new2old : dict
Dictionary that maps the new node index to the old one; i.e., a reverse mapping of old2new.

new_types : :class:numpy.ndarray
The new types-array, which is sorted orderly and directly applicable to :class:det_k_bisbm.OptimalKs.

"""
old2new = dict()
new_types = np.zeros(len(types), dtype=np.int_)

new_id = 0
for _id, t in enumerate(types):
if t == 1:
old2new[_id] = new_id
new_types[new_id] = 1
new_id += 1

for _id, t in enumerate(types):
if t == 2:
old2new[_id] = new_id
new_types[new_id] = 2
new_id += 1

new2old = {value: key for key, value in old2new.items()}
return old2new, new2old, new_types

# ##################
# ASSEMBLE FUNCTIONS
# ##################

[docs]def assemble_edgelist_old2new(edgelist, old2new):
"""Assemble the new edgelist array via an old2new mapping.

Parameters
----------
edgelist : :class:numpy.ndarray

old2new : dict
Dictionary that maps the old node index to a new one.

Returns
-------
el : :class:numpy.ndarray
The new edgelist of a group of bi-cliques (directly pluggable to :class:det_k_bisbm.OptimalKs)

"""
el = np.zeros((len(edgelist), 2), dtype=np.int_)
for _id, _ in enumerate(edgelist):
el[_id][0] = old2new[_[0]]
el[_id][1] = old2new[_[1]]
return el

[docs]@njit(cache=True)
def assemble_mb_new2old(mb, new2old):
"""Assemble the partition that corresponds to the old space of node indices.

Parameters
----------
mb : :class:numpy.ndarray
Partition :math:b of nodes into blocks.

new2old : dict
Dictionary that maps the new node index to the old one; i.e., a reverse mapping of old2new.

Returns
-------
old_mb : :class:numpy.ndarray
The partition that corresponds to the old space of node indices.

"""
old_mb = np.zeros(len(mb), dtype=np.int_)
for _id, _ in enumerate(mb):
old_mb[new2old[_id]] = _
return old_mb

[docs]@njit(cache=True)
def assemble_n_r_from_mb(mb):
"""Get :math:n_r, i.e., the number of nodes in each group, from the partition :math:b.

Parameters
----------
mb : iterable or :class:numpy.ndarray
Partition :math:b of nodes into blocks.

Returns
-------
n_r : :class:numpy.ndarray

"""
n_r = np.zeros(np.max(mb) + 1)
for block_id in mb:
n_r[block_id] += 1
n_r = np.array([int(x) for x in n_r], dtype=np.int_)
return n_r

[docs]@njit(cache=True)
def assemble_n_k_from_edgelist(edgelist, mb):
"""Get :math:n_k, i.e., the number :math:n_k of nodes of degree :math:k.

Parameters
----------
edgelist : :class:numpy.ndarray
List of edge tuples.

mb : :class:numpy.ndarray
Partition :math:b of nodes into blocks.

Returns
-------
n_k : :class:numpy.ndarray
Array of the number of nodes of degree :math:k.

"""
k = np.zeros(len(mb) + 1, dtype=np.int_)
for idx in range(len(edgelist)):
k[edgelist[idx][0]] += 1
k[edgelist[idx][1]] += 1

max_ = np.int_(np.max(k))
n_k = np.zeros(max_ + 1, dtype=np.int_)
for k_ in k:
n_k[k_] += 1
return n_k

[docs]def assemble_e_rs_from_mb(edgelist, mb):
"""Get :math:e_{rs}, i.e., the matrix of edge counts between blocks.

Parameters
----------
edgelist : :class:numpy.ndarray
List of edge tuples.

mb : :class:numpy.ndarray
Partition :math:b of nodes into blocks.

Returns
-------
e_rs : :class:numpy.ndarray
Edge count matrix :math:e_{rs}.

"""
sources, targets = zip(*edgelist)
sources = [mb[node] for node in sources]
targets = [mb[node] for node in targets]
data = np.ones(len(sources + targets), dtype=np.int_)
shape = int(np.max(mb) + 1)
e_rs = coo_matrix((data, (sources + targets, targets + sources)), shape=(shape, shape))

return e_rs.toarray()

[docs]@njit(cache=True)
def assemble_eta_rk_from_edgelist_and_mb(edgelist, mb):
"""Get :math:\eta_{rk}, or the number :math:\eta_{rk} of nodes of degree :math:k that belong to group :math:r.

Parameters
----------
edgelist : :class:numpy.ndarray

mb : :class:numpy.ndarray
Partition :math:b of nodes into blocks.

Returns
-------
eta_rk : :class:numpy.ndarray

"""
mb_max_ = np.int_(np.max(mb))

k = np.zeros((mb_max_ + 1, len(mb)), dtype=np.int_)

for idx in range(len(edgelist)):
k[mb[edgelist[idx][0]]][edgelist[idx][0]] += 1
k[mb[edgelist[idx][1]]][edgelist[idx][1]] += 1

max_array = np.empty(k.shape[0], dtype=np.int_)
for _k_idx in range(len(k)):
max_array[_k_idx] = np.max(k[_k_idx])
max_ = np.max(max_array)

eta_rk = np.zeros((mb_max_ + 1, max_ + 1), dtype=np.int_)
for mb_ in range(mb_max_ + 1):
for node_idx, k_ in enumerate(k[mb_]):
if mb[node_idx] == mb_:
eta_rk[mb_][k_] += 1

return eta_rk

[docs]def compute_profile_likelihood(edgelist, mb, ka=None, kb=None, k=None):
"""compute_profile_likelihood

Parameters
----------
edgelist : :class:numpy.ndarray

mb : :class:numpy.ndarray
Partition :math:b of nodes into blocks.

ka : int
Number of communities in type-*a*.

kb : int
Number of communities in type-*b*.

k : int
Total number of communities.

Returns
-------
italic_i : float

"""
# First, let's compute the m_e_rs from the edgelist and mb
m_e_rs = np.zeros((max(mb) + 1, max(mb) + 1))
for i in edgelist:
# Please do check the index convention of the edgelist
source_group = int(mb[int(i[0])])
target_group = int(mb[int(i[1])])
if ka is not None or kb is not None:
if source_group == target_group:
raise ImportError("This is not a bipartite network!")
m_e_rs[source_group][target_group] += 1
m_e_rs[target_group][source_group] += 1

# then, we compute the profile likelihood from the m_e_rs
italic_i = 0.
m_e_r = np.sum(m_e_rs, axis=1)
num_edges = m_e_r.sum() / 2.
for ind, e_val in enumerate(np.nditer(m_e_rs)):
ind_i = int(np.floor(ind / (m_e_rs.shape[0])))
ind_j = ind % (m_e_rs.shape[0])
if e_val != 0.0:
italic_i += e_val / 2. / num_edges * np.log(
e_val / m_e_r[ind_i] / m_e_r[ind_j] * 2 * num_edges
)
if ka is not None or kb is not None:
assert m_e_rs.shape[0] == ka + kb, "[ERROR] m_e_rs dimension (={}) is not equal to ka (={}) + kb (={})!".format(
m_e_rs.shape[0], ka, kb
)
elif k is not None:
assert m_e_rs.shape[0] == k, "[ERROR] m_e_rs dimension (={}) is not equal to k (={})!".format(
m_e_rs.shape[0], k
)
return italic_i

[docs]@njit(cache=True)
def compute_profile_likelihood_from_e_rs(e_rs):
"""compute_profile_likelihood_from_e_rs

Parameters
----------
e_rs : :class:numpy.ndarray

Returns
-------
italic_i : float

"""
italic_i = 0.
e_r = np.sum(e_rs, axis=1)
num_edges = e_r.sum() / 2.
for ind, e_val in enumerate(np.nditer(e_rs)):
ind_i = np.int_(np.floor(ind / (e_rs.shape[0])))
ind_j = ind % (e_rs.shape[0])
if e_val != 0.0:
italic_i += e_val / 2. / num_edges * np.log(
e_val / e_r[ind_i] / e_r[ind_j] * 2 * num_edges
)
return italic_i

[docs]def get_desc_len_from_data(na, nb, n_edges, ka, kb, edgelist, mb, diff=False, nr=None, allow_empty=False,
degree_dl_kind="distributed", q_cache=np.array([], ndmin=2), is_bipartite=True):
"""Description length difference to a randomized instance

Parameters
----------
na : int
Number of nodes in type-*a*.

nb : int
Number of nodes in type-*b*.

n_edges : int
Number of edges.

ka : int
Number of communities in type-*a*.

kb : int
Number of communities in type-*b*.

edgelist : :class:numpy.ndarray
Edgelist in Python list structure.

mb : :class:numpy.ndarray
Partition :math:b of nodes into blocks.

diff : bool
When diff == True,
the returned description value will be the difference to that of a random bipartite network. Otherwise, it will
return the entropy (a.k.a. negative log-likelihood) associated with the current block partition.

allow_empty : bool

nr : :class:numpy.ndarray

degree_dl_kind : str (optional, default: "distributed")
1. degree_dl_kind == "uniform"
2. degree_dl_kind == "distributed" (default)
3. degree_dl_kind == "entropy"
is_bipartite: bool (default: "True")

Returns
-------
desc_len_b : float
Difference of the description length to the bipartite ER network, per edge.

"""
desc_len = 0.
# finally, we compute the description length
if diff:  # todo: add more options to it; now, only uniform prior for P(e) is included.
italic_i = compute_profile_likelihood(edgelist, mb, ka=ka, kb=kb)
desc_len += (na * np.log(ka) + nb * np.log(kb) - n_edges * (italic_i - np.log(2))) / n_edges
x = float(ka * kb) / n_edges
desc_len += (1 + x) * np.log(1 + x) - x * np.log(x)
desc_len -= (1 + 1 / n_edges) * np.log(1 + 1 / n_edges) - (1 / n_edges) * np.log(1 / n_edges)
else:
desc_len += model_entropy(n_edges, ka=ka, kb=kb, na=na, nb=nb, nr=nr, allow_empty=allow_empty,
is_bipartite=is_bipartite)
desc_len += degree_entropy(edgelist, mb, __q_cache=q_cache, degree_dl_kind=degree_dl_kind)
return desc_len.__float__()

[docs]def get_desc_len_from_data_uni(n, n_edges, k, edgelist, mb):
"""Description length difference to a randomized instance, via PRL 110, 148701 (2013).

Parameters
----------
n : int
Number of nodes.

n_edges : int
Number of edges.

k : int
Number of communities.

edgelist : :class:numpy.ndarray
A list of edges.

mb : :class:numpy.ndarray
Partition :math:b of nodes into blocks.

Returns
-------
desc_len_b : float
Difference of the description length to the ER network, per edge.

"""
italic_i = compute_profile_likelihood(edgelist, mb, k=k)

# finally, we compute the description length
desc_len_b = (n * np.log(k) - n_edges * italic_i) / n_edges
x = float(k * (k + 1)) / 2. / n_edges
desc_len_b += (1 + x) * np.log(1 + x) - x * np.log(x)
desc_len_b -= (1 + 1 / n_edges) * np.log(1 + 1 / n_edges) - (1 / n_edges) * np.log(1 / n_edges)
return desc_len_b

[docs]@njit(cache=True, fastmath=True)
def accept_mb_merge(mb, mlist):
"""accept_mb_merge

Accept partition merge.

Parameters
----------
mb : iterable or :class:numpy.ndarray
The partition to be merged.

mlist : iterable or :class:numpy.ndarray
The two block labels to be merged.

Returns
-------
_mb : :class:numpy.ndarray
The merged partition.

"""
_mb = np.zeros(mb.size, dtype=np.int_)
mlist.sort()
for _node_id, _g in enumerate(mb):
if _g == mlist[1]:
_mb[_node_id] = mlist[0]
elif _g < mlist[1]:
_mb[_node_id] = _g
else:
_mb[_node_id] = _g - 1
return _mb

# ###############
# Parallelization
# ###############

[docs]def loky_executor(max_workers, timeout, func, feeds):
assert type(feeds) is list, "[ERROR] feeds should be a Python list; here it is {}".format(str(type(feeds)))
loky_executor = get_reusable_executor(max_workers=int(max_workers), timeout=int(timeout))
results = loky_executor.map(func, feeds)
return results

# ##########################
# Requires graph-tool to run
# ##########################

[docs]def get_flat_entropies(state):
"""get_flat_entropies

Parameters
----------
state : :class:graph_tool.inference.blockmodel.BlockState
The stochastic block model state of a given graph, as defined in Graph-tool.

Returns
-------
dl : dict
The entropic report of the partition.

"""
na = sum(state.pclabel.a == 0)
dl = dict()
dl["mdl"] = state.entropy()
dl["ka"] = len(set(state.b.a.tolist()[:na]))
dl["kb"] = len(set(state.b.a.tolist()[na:]))
dl["partition"] = state.entropy(partition_dl=1) - state.entropy(partition_dl=0)
dl["degree"] = state.entropy(degree_dl=1) - state.entropy(degree_dl=0)
dl["edges"] = state.entropy(edges_dl=1) - state.entropy(edges_dl=0)
return dl

[docs]def get_nested_entropies(state):
"""get_nested_entropies

Parameters
----------
state : :class:graph_tool.inference.nested_blockmodel.NestedBlockState

Returns
-------
dl : dict

"""
na = sum(state.levels[0].pclabel.a == 0)
dl = dict()
dl["mdl"] = state.entropy()
dl["ka"] = len(set(state.levels[0].b.a.tolist()[:na]))
dl["kb"] = len(set(state.levels[0].b.a.tolist()[na:]))

dl["partition"] = state.levels[0].entropy(partition_dl=1) - state.levels[0].entropy(partition_dl=0)
dl["edges"] = state.levels[0].entropy(edges_dl=1) - state.levels[0].entropy(edges_dl=0)
dl["degree"] = state.levels[0].entropy(degree_dl=1) - state.levels[0].entropy(degree_dl=0)

multigraph_dls = []
for i in range(1, len(state.levels)):
partition = state.levels[i].entropy(adjacency=0, multigraph=1, partition_dl=1, degree_dl=0, edges_dl=0, dense=1)

if i != len(state.levels) - 1:
edges = 0.
else:
edges = state.levels[i].entropy(adjacency=0, multigraph=1, partition_dl=0, degree_dl=0, edges_dl=1, dense=1)
degree = state.levels[i].entropy(adjacency=0, multigraph=1, partition_dl=0, degree_dl=1, edges_dl=0, dense=1)
total = adj + partition + edges + degree
multigraph_dls += [{
"sum": total,