import numpy as np
import scipy.stats
from scipy import ndimage
from scipy.optimize import curve_fit
from imutils import nan_to_zero
# try to use cv2 for faster image processing
try:
import cv2
cv2.connectedComponents # relatively recent addition, so check presence
opencv_found = True
except (ImportError, AttributeError):
opencv_found = False
[docs]def measure_of_chaos(im, nlevels, overwrite=True, statistic=None):
"""
Compute a measure for the spatial chaos in given image using the level sets method.
:param im: 2d array
:param nlevels: how many levels to use
:type nlevels: int
:param overwrite: Whether the input image can be overwritten to save memory
:type overwrite: bool
:param statistic: callable that calculates a score (a number) for the object counts in the level sets. If
specified, this statistic will be used instead of the default one. The callable must take two arguments - the
object counts (sequence of ints) and the number of non-zero pixels in the original image (int) - and output a number
:return: the measured value
:rtype: float
:raises ValueError: if nlevels <= 0 or q_val is an invalid percentile or an unknown interp value is used
"""
statistic = statistic or _default_measure
# don't process empty images
if np.sum(im) == 0:
return np.nan
sum_notnull = np.sum(im > 0)
# reject very sparse images
if sum_notnull < 4:
return np.nan
if not overwrite:
# don't modify original image, make a copy
im = im.copy()
notnull_mask = nan_to_zero(im)
im_clean = im / np.max(im) # normalize to 1
# Level Sets Calculation
object_counts = _level_sets(im_clean, nlevels)
return statistic(object_counts, sum_notnull)
[docs]def measure_of_chaos_fit(im, nlevels, overwrite=True):
"""
This function is identical to measure_of_chaos except that it uses a different statistic.
"""
return measure_of_chaos(im, nlevels, overwrite=overwrite, statistic=_fit)
def _dilation_and_erosion(im, dilate_mask=None, erode_mask=None):
dilate_mask = dilate_mask or [[0, 1, 0], [1, 1, 1], [0, 1, 0]]
erode_mask = erode_mask or [[1, 1, 1], [1, 1, 1], [1, 1, 1]]
if opencv_found:
im = np.asarray(im, dtype=np.uint8)
im = cv2.dilate(im, np.asarray(dilate_mask, dtype=np.uint8))
im = cv2.erode(im, np.asarray(erode_mask, dtype=np.uint8))
return im
return ndimage.binary_erosion(ndimage.morphology.binary_dilation(im, structure=dilate_mask), structure=erode_mask)
def _level_sets(im_clean, nlevels, prep=_dilation_and_erosion):
"""
Divide the image into level sets and count the number of objects in each of them.
:param im_clean: 2d array with :code:`im_clean.max() == 1`
:param int nlevels: number of levels to search for objects (positive integer)
:param prep: callable that takes a 2d array as its only argument and returns a 2d array
:return: sequence with the number of objects in each respective level
"""
if nlevels <= 0:
raise ValueError("nlevels must be positive")
prep = prep or (lambda x: x) # if no preprocessing should be done, use the identity function
# TODO change the levels. Reason:
# - in the for loop, the > operator is used. The highest level is 1, therefore the highest level set will always
# be empty. The ndimage.label function then returns 1 as the number of objects in the empty image, although it
# should be zero.
# Proposed solution:
# levels = np.linspace(0, 1, nlevels + 2)[1:-1]
# That is, create nlevels + 2 levels, then throw away the zero level and the one level
# or:
# levels = np.linspace(0, 1, nlevels)[1:-1]
# That is, only use nlevels - 2 levels. This means that the output array will have a size of nlevels - 2
levels = np.linspace(0, 1, nlevels) # np.amin(im), np.amax(im)
# Go through levels and calculate number of objects
num_objs = []
count_func = (lambda im: cv2.connectedComponents(im, connectivity=4)[0] - 1) if opencv_found else (lambda im: ndimage.label(im)[1])
for lev in levels:
# Threshold at level
bw = (im_clean > lev)
bw = prep(bw)
# Record objects at this level
num_objs.append(count_func(bw))
return num_objs
def _default_measure(num_objs, sum_notnull):
"""
Calculate a statistic for the object counts.
:param num_objs: number of objects found in each level, respectively
:param float sum_notnull: sum of all non-zero elements in the original array (positive number)
:return: the calculated value between 0 and 1, bigger is better
"""
num_objs = np.asarray(num_objs, dtype=np.int_)
nlevels = len(num_objs)
if sum_notnull <= 0:
raise ValueError("sum_notnull must be positive")
if min(num_objs) < 0:
raise ValueError("cannot have negative object counts")
if nlevels < 1:
raise ValueError("array of object counts is empty")
if np.unique(num_objs).shape[0] <= 1:
return np.nan
sum_vals = float(np.sum(num_objs))
return 1 - sum_vals / (sum_notnull * nlevels)
# this updates the scoring function from the main algorithm.
def _fit(num_objs, _):
"""
An alternative statistic for measure_of_chaos.
:param num_objs: number of objects found in each level, respectively
:param _: unused dummy parameter, kept for signature compatibility with _default_measure
:return: the calculated value
"""
num_objs = np.asarray(num_objs, dtype=np.int_)
nlevels = len(num_objs)
if min(num_objs) < 0:
raise ValueError("must have at least one object in each level")
if nlevels < 1:
raise ValueError("array of object counts is empty")
if np.unique(num_objs).shape[0] < 2:
return np.nan
def func(x, a, b):
return scipy.stats.norm.cdf(x, loc=a, scale=b)
# measure_value, im, levels, num_objs = measure_of_chaos(im, nlevels)
# if measure_value == np.nan: # if basic algorithm failed then we're going to fail here too
# return np.nan
cdf_curve = np.cumsum(num_objs) / float(np.sum(num_objs))
popt, pcov = curve_fit(func, np.linspace(0, 1, nlevels), cdf_curve, p0=(0.5, 0.05))
pdf_fitted = func(np.linspace(0, 1, nlevels), popt[0], popt[1])
# return 1-np.sqrt(np.sum((pdf_fitted - cdf_curve)**2))
return 1 - np.sum(np.abs((pdf_fitted - cdf_curve)))
[docs]def isotope_pattern_match(images_flat, theor_iso_intensities):
"""
This function calculates a match between a list of isotope ion images and a theoretical intensity vector.
:param images_flat: 2d array (or sequence of 1d arrays) of pixel intensities with shape (d1, d2) where d1 is the number of images and d2 is the number of pixels per image, i.e. :code:`images_flat[i]` is the i-th flattened image
:param theor_iso_intensities: 1d array (or sequence) of theoretical isotope intensities with shape d1, i.e :code:`theor_iso_intensities[i]` is the theoretical isotope intensity corresponding to the i-th image
:return: measure value between 0 and 1, bigger is better
:rtype: float
:raise TypeError: if images are not 1d
:raise ValueError: if images are not equally shaped or if the number of images and the number of intensities differ
"""
d1 = len(images_flat)
if d1 != len(theor_iso_intensities):
raise ValueError("amount of images and theoretical intensities must be equal")
if any(np.shape(im) != np.shape(images_flat[0]) for im in images_flat):
raise ValueError("images are not equally sized")
if any(len(np.shape(im)) != 1 for im in images_flat):
raise TypeError("images are not 1d")
if any(intensity < 0 for intensity in theor_iso_intensities):
raise ValueError("intensities must be >= 0")
image_ints = []
not_null = images_flat[0] > 0
for ii, _ in enumerate(theor_iso_intensities):
image_ints.append(np.sum(images_flat[ii][not_null]))
pattern_match = 1 - np.mean(abs(theor_iso_intensities / np.linalg.norm(theor_iso_intensities) -
image_ints / np.linalg.norm(image_ints)))
if pattern_match == 1.:
return 0
return pattern_match
[docs]def isotope_image_correlation(images_flat, weights=None):
"""
Function for calculating a weighted average measure of image correlation with the principle image.
:param images_flat: 2d array (or sequence of 1d arrays) of pixel intensities with shape (d1, d2) where d1 is the number of images and d2 is the number of pixels per image, i.e. :code:`images_flat[i]` is the i-th flattened image
:param weights: 1d array (or sequence) of weights with shape (d1 - 1), i.e :code:`weights[i]` is the weight to put on the correlation between the first and the i-th image. If omitted, all correlations are weighted equally
:return: measure_value (zero if less than 2 images are given)
:raise TypeError: if images are not 1d
:raise ValueError: if images are not equally shaped or if the number of correlations and the number of weights (if given) differ
"""
if len(images_flat) < 2:
return 0
if any(len(np.shape(im)) != 1 for im in images_flat):
raise TypeError("images are not 1d")
else:
# slightly faster to compute all correlations and pull the elements needed
iso_correlation = np.corrcoef(images_flat)[1:, 0]
# when all values are the same (e.g. zeros) then correlation is undefined
iso_correlation[np.isinf(iso_correlation) | np.isnan(iso_correlation)] = 0
# coerce between [0 1]
iso_correlation=np.clip(iso_correlation,0,1)
try:
return np.average(iso_correlation, weights=weights)
except TypeError:
raise ValueError("Number of images is not equal to the number of weights + 1")