Source code for ristretto.sketch.transforms

"""
Functions for approximating the range of a matrix A.
"""
from __future__ import division
from math import log

import numpy as np
from scipy import fftpack
from sklearn.utils import check_random_state
from sklearn.utils.extmath import safe_sparse_dot

from . import _sketches


[docs]def randomized_uniform_sampling(A, l, axis=1, random_state=None):
    """Uniform randomized sampling transform.

    Given an m x n matrix A, and an integer l, this returns an m x l
    random subset of the range of A.

    """
    random_state = check_random_state(random_state)
    A = np.asarray(A)

    # sample l rows/columns with equal probability
    idx = _sketches.random_axis_sample(A, l, axis, random_state)

    return np.take(A, idx, axis=axis)


[docs]def johnson_lindenstrauss(A, l, axis=1, random_state=None):
    """

    Given an m x n matrix A, and an integer l, this scheme computes an m x l
    orthonormal matrix Q whose range approximates the range of A

    """
    random_state = check_random_state(random_state)

    A = np.asarray(A)
    if A.ndim != 2:
        raise ValueError('A must be a 2D array, not %dD' % A.ndim)

    if axis not in (0, 1):
        raise ValueError('If supplied, axis must be in (0, 1)')

    # construct gaussian random matrix
    Omega = _sketches.random_gaussian_map(A, l, axis, random_state)

    # project A onto Omega
    if axis == 0:
        return Omega.T.dot(A)
    return A.dot(Omega)


[docs]def sparse_johnson_lindenstrauss(A, l, density=None, axis=1, random_state=None):
    """

    Given an m x n matrix A, and an integer l, this scheme computes an m x l
    orthonormal matrix Q whose range approximates the range of A

    Parameters
    ----------
    density : sparse matrix density

    """
    random_state = check_random_state(random_state)

    A = np.asarray(A)
    if A.ndim != 2:
        raise ValueError('A must be a 2D array, not %dD' % A.ndim)

    if axis not in (0, 1):
        raise ValueError('If supplied, axis must be in (0, 1)')

    if density is None:
        density = log(A.shape[0]) / A.shape[0]

    # construct sparse sketch
    Omega = _sketches.sparse_random_map(A, l, axis, density, random_state)

    # project A onto Omega
    if axis == 0:
        return safe_sparse_dot(Omega.T, A)
    return safe_sparse_dot(A, Omega)


[docs]def fast_johnson_lindenstrauss(A, l, axis=1, random_state=None):
    """

    Given an m x n matrix A, and an integer l, this scheme computes an m x l
    orthonormal matrix Q whose range approximates the range of A

    """
    random_state = check_random_state(random_state)

    A = np.asarray_chkfinite(A)
    if A.ndim != 2:
        raise ValueError('A must be a 2D array, not %dD' % A.ndim)

    if axis not in (0, 1):
        raise ValueError('If supplied, axis must be in (0, 1)')

    # TODO: Find name for sketch and put in _sketches
    # construct gaussian random matrix
    diag = random_state.choice((-1, 1), size=A.shape[axis]).astype(A.dtype)

    if axis == 0:
        diag = diag[:, np.newaxis]

    # discrete fourier transform of AD (or DA)
    FDA = fftpack.dct(A * diag, axis=axis, norm='ortho')

    # randomly sample axis
    return randomized_uniform_sampling(
        FDA, l, axis=axis, random_state=random_state)