Vexpr

Represent code expressions as data structures, then transform them.

See: “What happens when you vectorize wide PyTorch expressions?”

Install

pip install vexpr

Vexpr is currently in technical preview and may throw “Not Implemented” if you try something new.

Get started

Example: a custom distance metric between two lists of vectors, x1 and x2.

NumPy

PyTorch

JAX

1. Create a Vexpr

import vexpr as vp
import vexpr.numpy as vnp
import vexpr.scipy.spatial.distance as vsd

w1 = vp.symbol("w1")
w2 = vp.symbol("w2")
x1 = vp.symbol("x1")
x2 = vp.symbol("x2")

expr = vnp.sum([w1 * vsd.cdist(x1[..., [0, 1, 2]], x2[..., [0, 1, 2]]),
                w2 * vsd.cdist(x1[..., [0, 3, 4]], x2[..., [0, 3, 4]])],
               axis=0)
print(expr)
# Output: a Vexpr data structure:
#
# numpy.sum(
#   [operator.mul(
#     symbol('w1'),
#     scipy.spatial.distance.cdist(
#       operator.getitem(
#         symbol('x1'),
#         (Ellipsis, [0, 1, 2]),
#       ),
#       operator.getitem(
#         symbol('x2'),
#         (Ellipsis, [0, 1, 2]),
#       ),
#     ),
#   ),
#    operator.mul(
#     symbol('w2'),
#     scipy.spatial.distance.cdist(
#       operator.getitem(
#         symbol('x1'),
#         (Ellipsis, [0, 3, 4]),
#       ),
#       operator.getitem(
#         symbol('x2'),
#         (Ellipsis, [0, 3, 4]),
#       ),
#     ),
#   )]
#   axis=0
# )

2. Transform into a faster Vexpr that would have been difficult to write directly

import numpy as np

example_inputs = dict(
    x1=np.random.randn(10, 5),
    x2=np.random.randn(10, 5),
    w1=np.array(0.7),
    w2=np.array(0.3),
)

expr = vp.vectorize(expr, example_inputs)
print(expr)
# numpy.sum(
#   operator.mul(
#     numpy.reshape(
#       numpy.stack([symbol('w1'), symbol('w2')]),
#       (2, 1, 1),
#     ),
#     custom.scipy.cdist_multi(
#       operator.getitem(
#         symbol('x1'),
#         (Ellipsis, array([0, 1, 2, 0, 3, 4])),
#       ),
#       operator.getitem(
#         symbol('x2'),
#         (Ellipsis, array([0, 1, 2, 0, 3, 4])),
#       ),
#       lengths=array([3, 3])
#     ),
#   )
#   axis=0
# )

3. Evaluate the Vexpr, as you would if you were training w1 and w2

inputs = dict(x1=np.random.randn(12, 5),
              x2=np.random.randn(4, 5),
              w1=np.array(0.6),
              w2=np.array(0.4),)
print(vp.eval(expr, inputs))
# [[1.55860886 1.81932763 1.36601246 2.74558064]
#  [1.07449014 2.41388948 2.05383731 3.47491204]
#  [3.44607574 4.11058513 1.73149737 3.99700678]
#  [1.42342409 1.89316449 2.36516876 2.61242728]
#  [2.10589466 2.16815159 1.05028078 3.2819643 ]
#  [2.6376981  1.86969234 4.09429083 3.39908103]
#  [2.46510162 2.13610497 2.91302844 3.65995608]
#  [1.65351302 1.66339115 2.56035358 1.93349338]
#  [1.15303396 2.07962417 2.23623819 2.63961701]
#  [2.90055677 1.57172764 3.10181813 2.25698896]
#  [1.83600204 2.63654294 1.22630251 3.47381211]
#  [2.61149285 2.77062418 0.78998639 3.10032325]]

4. Use partial evaluation to precompute intermediate state, as you would before inference

parameters = dict(w1=0.6, w2=0.4)
expr = vp.partial_eval(expr, parameters)
print(expr)
# numpy.sum(
#   operator.mul(
#     array([[[0.6]],
#            [[0.4]]]),
#     custom.scipy.cdist_multi(
#       operator.getitem(
#         symbol('x1'),
#         (Ellipsis, array([0, 1, 2, 0, 3, 4])),
#       ),
#       operator.getitem(
#         symbol('x2'),
#         (Ellipsis, array([0, 1, 2, 0, 3, 4])),
#       ),
#       lengths=array([3, 3])
#     ),
#   )
#   axis=0
# )

1. Create a Vexpr

import vexpr as vp
import vexpr.torch as vtorch

w1 = vp.symbol("w1")
w2 = vp.symbol("w2")
x1 = vp.symbol("x1")
x2 = vp.symbol("x2")

expr = vtorch.sum([w1 * vtorch.cdist(x1[..., [0, 1, 2]], x2[..., [0, 1, 2]]),
                   w2 * vtorch.cdist(x1[..., [0, 3, 4]], x2[..., [0, 3, 4]])],
                  dim=0)
print(expr)
# torch.sum(
#   [operator.mul(
#     symbol('w1'),
#     torch.cdist(
#       operator.getitem(
#         symbol('x1'),
#         (Ellipsis, [0, 1, 2]),
#       ),
#       operator.getitem(
#         symbol('x2'),
#         (Ellipsis, [0, 1, 2]),
#       ),
#     ),
#   ),
#    operator.mul(
#     symbol('w2'),
#     torch.cdist(
#       operator.getitem(
#         symbol('x1'),
#         (Ellipsis, [0, 3, 4]),
#       ),
#       operator.getitem(
#         symbol('x2'),
#         (Ellipsis, [0, 3, 4]),
#       ),
#     ),
#   )]
#   dim=0
# )

2. Transform into a faster Vexpr that would have been difficult to write directly

import torch

example_inputs = dict(
    x1=torch.randn(10, 5),
    x2=torch.randn(10, 5),
    w1=torch.tensor(0.7),
    w2=torch.tensor(0.3),
)

expr = vp.vectorize(expr, example_inputs)
print(expr)
# torch.sum(
#   custom.torch.mul_along_dim(
#     torch.stack([symbol('w1'), symbol('w2')]),
#     custom.torch.cdist_multi(
#       operator.getitem(
#         symbol('x1'),
#         (Ellipsis, tensor([0, 1, 2, 0, 3, 4])),
#       ),
#       operator.getitem(
#         symbol('x2'),
#         (Ellipsis, tensor([0, 1, 2, 0, 3, 4])),
#       ),
#       groups=[((3, 2), 2)]
#     ),
#     dim=0
#   )
#   dim=0
# )

3. Evaluate the Vexpr, as you would if you were training w1 and w2

inputs = dict(x1=torch.randn(12, 5),
              x2=torch.randn(4, 5),
              w1=torch.tensor(0.6),
              w2=torch.tensor(0.4),)
print(vp.eval(expr, inputs))
# tensor([[3.1750, 2.2383, 2.6217, 1.0710],
#         [2.3972, 1.8493, 1.8987, 1.8038],
#         [2.7758, 0.9884, 1.8191, 2.8204],
#         [1.5958, 2.4894, 2.3942, 2.3034],
#         [2.2631, 0.7308, 1.2725, 1.6628],
#         [2.7736, 0.8804, 1.8810, 2.0894],
#         [2.7475, 1.7807, 1.7098, 1.9817],
#         [1.6824, 2.3360, 2.4505, 2.4344],
#         [1.4595, 1.9179, 1.7824, 1.4457],
#         [2.1513, 1.6023, 0.9952, 1.4258],
#         [2.4210, 3.1545, 2.1091, 2.6089],
#         [3.2171, 1.3637, 2.2806, 3.0934]])

4. Use partial evaluation to precompute intermediate state, as you would before inference

parameters = dict(w1=0.6, w2=0.4)
expr = vp.partial_eval(expr, parameters)
print(expr)
# torch.sum(
#   custom.torch.mul_along_dim(
#     tensor([0.6000, 0.4000]),
#     custom.torch.cdist_multi(
#       operator.getitem(
#         symbol('x1'),
#         (Ellipsis, tensor([0, 1, 2, 0, 3, 4])),
#       ),
#       operator.getitem(
#         symbol('x2'),
#         (Ellipsis, tensor([0, 1, 2, 0, 3, 4])),
#       ),
#       groups=[((3, 2), 2)]
#     ),
#     dim=0
#   )
#   dim=0
# )