Source code for cntk.contrib.netopt.factorization

import cntk
from cntk.ops.functions import BlockFunction
from cntk.variables import Parameter
from cntk.ops import times
from cntk.internal import _as_tuple
from cntk.layers.blocks import _initializer_for, _INFERRED, identity
from cntk.layers.blocks import UntestedBranchError  # helpers
from cntk.default_options import is_default_override
from cntk.default_options import get_default_override, default_override_or

[docs]def svd_subprojection(matrix, k):
    '''
    Calculate svd of the matrix and produce a subprojection based on k

    Args:
        matrix : an input matrix        
        k (int): desired rank of the output matrix

    Returns:
        two matrices representing the original matrix after svd and 
        reducing them based on k.
    '''
    
    import numpy as np
    from numpy import dot, diag
    from numpy.linalg import svd

    # Decompose W into (U, s, V)
    U, s, V = svd(matrix, full_matrices=False)
          
    # Create two dense layers from this; one that takes U, one that takes
    # dot(s, V), but restrict them all to rank k, such that the result is a
    # k-rank subprojection
    W1 = np.ascontiguousarray(U[:, :k])
    W2 = dot(diag(s[:k]), V[:k, :])
   
    return W1, W2


[docs]def factor_dense(model, projection_function = None, filter_function = None, 
                 factor_function = None):
    '''
    Reduce the size of a dense model using the provided factor_function 
    and the projection_function. filter_function is used to select dense 
    layers to apply the reduction. If no factor_function is specified, 
    use svd decomposition. 

    Args:
        model               : dense model.
        projection_function : determin the new size of the dense model. It can 
                              be based on the shape of the weight matrix or 
                              other heuristics.
                              factor_function can choose to ignore the value k.
        filter_function     : filter layers in the model to apply the factorization
        factor_function     : factor the dense model (e.g. svd)   
                
    Returns:
        a model that is factored and reduced in size.
    '''
    if (factor_function == None and projection_function == None):
        raise ValueError("Dense: default factor function (svd) requires a projection_function.")
    
    dense_filter = (lambda x: type(x) == cntk.Function 
                                            and x.op_name == 'Dense' 
                                            and x.is_block
                                            and (filter_function(x) if filter_function else True))
   
    def dense_converter(model):        
        W, b = model.W.value, model.b.value

        ht, wdth = W.shape        
        # k is the rank of the output matrices. If a projection function is 
        # provided, then use it, otherwise assign min of two dimensions of
        # W to k.
        k = projection_function(W) if projection_function else min(ht, wdth)
        W1, W2 = factor_function(W, k) if factor_function else svd_subprojection(W, k)

        Ws = {'W1': W1, 'W2': W2}
        dfl = dense_factored((int(k), int(wdth)),
            init=Ws,
            activation=None,
            init_bias=b,
            name='DenseFactored')(model.inputs[2])
        return dfl

    return cntk.misc.convert(model, dense_filter, dense_converter)


[docs]def dense_factored(shapes, #(shape1, shape2)
                  activation=default_override_or(identity),
                  init={'W1':None, 'W2':None},
                  input_rank=None,
                  map_rank=None,
                  bias=default_override_or(True),
                  init_bias=default_override_or(0),
                  name=''):
    '''
    Perform the new model creation using the factored inputs W1 and W2. 
    The returend function represents the new model.

    Args:
        shapes                  : dimensions of the input matrices.
        activation              : activation function used for the model.
        init                    : the two matrices corresponding to the factorization.
        input_rank              : rank of the input tensor.
        map_rank                : ???
        bias                    : bias for the model.
        init_bias               : initial bias value.
        name                    : name of the block function that creates the new model.
        
    Returns:
        a model that is factored and projected (reduced).
    '''

    # matthaip: Not sure how to handle input tensor of rank > 1
    # or selective flattening of ranks
    assert(input_rank is None and
           map_rank is None and
           all(isinstance(s,int) for s in list(shapes)))

    activation = get_default_override(cntk.layers.Dense, activation=activation)
    bias       = get_default_override(cntk.layers.Dense, bias=bias)
    init_bias  = get_default_override(cntk.layers.Dense, init_bias=init_bias)
    # how to use get_default_override for init parameeter?

    output_shape1 = _as_tuple(shapes[0])
    output_shape2 = _as_tuple(shapes[1])
    if input_rank is not None and map_rank is not None:
        raise ValueError("Dense: input_rank and map_rank cannot be specified at the same time.")


    # If input_rank not given then pass a single _INFERRED; 
    # map_rank if given will determine the input_rank.
    # The dimension inference may still create multiple axes.
    input_shape = _INFERRED

    # parameters bound to this Function
    #    init_weights = _initializer_for(init, Record(output_rank=output_rank))
    init_weights = init
    W1 = Parameter(input_shape + output_shape1, init=init_weights['W1'], name='W1')
    W2 = Parameter(output_shape1 + output_shape2, init=init_weights['W2'], name='W2')
    b = Parameter(output_shape2, init=init_bias,    name='b') if bias else None

    # expression of this function
    @BlockFunction('DenseFactored', name)
    def dense(x):
        r = times(x, W1)
        r = times(r, W2)
        if b:
            r = r + b
        if activation is not None:
            r = activation(r)
        return r
    return dense

# Reference for sklearn.tucker.hooi:
# https://hal.inria.fr/hal-01219316/document