diff --git a/setup.py b/setup.py
index b37582f1..d99610a4 100644
--- a/setup.py
+++ b/setup.py
@@ -10,6 +10,7 @@
     name='spotlight',
     version=version,
     packages=find_packages(),
+    install_requires=['dynarray'],
     license='MIT',
     classifiers=['Development Status :: 3 - Alpha',
                  'License :: OSI Approved :: MIT License',
diff --git a/spotlight/evaluation.py b/spotlight/evaluation.py
index 54909e82..cd9693e4 100644
--- a/spotlight/evaluation.py
+++ b/spotlight/evaluation.py
@@ -83,19 +83,22 @@ def sequence_mrr_score(model, test, exclude_preceding=False):
         Array of MRR scores for each sequence in test.
     """
 
-    sequences = test.sequences[:, :-1]
-    targets = test.sequences[:, -1:]
-
     mrrs = []
 
-    for i in range(len(sequences)):
+    for sequence in test:
+
+        subsequence = sequence[:-1]
+        target = sequence[-1:]
+
+        if not len(subsequence):
+            continue
 
-        predictions = -model.predict(sequences[i])
+        predictions = -model.predict(subsequence)
 
         if exclude_preceding:
-            predictions[sequences[i]] = FLOAT_MAX
+            predictions[subsequence] = FLOAT_MAX
 
-        mrr = (1.0 / st.rankdata(predictions)[targets[i]]).mean()
+        mrr = (1.0 / st.rankdata(predictions)[target]).mean()
 
         mrrs.append(mrr)
 
diff --git a/spotlight/interactions.py b/spotlight/interactions.py
index 25b9f2fc..f8f5584c 100644
--- a/spotlight/interactions.py
+++ b/spotlight/interactions.py
@@ -7,6 +7,12 @@
 
 import scipy.sparse as sp
 
+import torch
+
+from dynarray import DynamicArray
+
+from spotlight.torch_utils import gpu
+
 
 def _sliding_window(tensor, window_size, step_size=1):
 
@@ -14,10 +20,28 @@ def _sliding_window(tensor, window_size, step_size=1):
         yield tensor[max(i - window_size, 0):i]
 
 
-def _generate_sequences(user_ids, item_ids,
-                        indices,
+def _generate_sequences(interactions,
                         max_sequence_length,
-                        step_size):
+                        min_sequence_length=1,
+                        step_size=1):
+
+    if interactions.timestamps is None:
+        raise ValueError('Cannot convert to sequences, '
+                         'timestamps not available.')
+
+    if step_size is None:
+        step_size = max_sequence_length
+
+    # Sort first by user id, then by timestamp
+    sort_indices = np.lexsort((interactions.timestamps,
+                               interactions.user_ids))
+
+    user_ids = interactions.user_ids[sort_indices]
+    item_ids = interactions.item_ids[sort_indices]
+
+    user_ids, indices, counts = np.unique(user_ids,
+                                          return_index=True,
+                                          return_counts=True)
 
     for i in range(len(indices)):
 
@@ -32,9 +56,30 @@ def _generate_sequences(user_ids, item_ids,
                                    max_sequence_length,
                                    step_size):
 
+            if len(seq) < min_sequence_length:
+                continue
+
             yield (user_ids[i], seq)
 
 
+def _pack_sequences(sequences):
+
+    packed = {}
+
+    for user_id, sequence in sequences:
+
+        (packed.setdefault(len(sequence),
+                           DynamicArray((None,
+                                         len(sequence)),
+                                        dtype=np.int64))
+         .append(sequence))
+
+    for value in packed.values():
+        value.shrink_to_fit()
+
+    return {key: value[:] for (key, value) in packed.items()}
+
+
 class Interactions(object):
     """
     Interactions object. Contains (at a minimum) pair of user-item
@@ -167,45 +212,44 @@ def tocsr(self):
 
         return self.tocoo().tocsr()
 
-    def to_sequence(self, max_sequence_length=10, min_sequence_length=None, step_size=None):
+    def to_sequence(self, max_sequence_length=10, min_sequence_length=1, step_size=None):
         """
         Transform to sequence form.
 
         User-item interaction pairs are sorted by their timestamps,
         and sequences of up to max_sequence_length events are arranged
-        into a (zero-padded from the left) matrix with dimensions
-        (num_sequences x max_sequence_length).
+        are returned. The returned sequences are not padded,
+        taking advatnage of PyTorch's flexibility.
 
         Valid subsequences of users' interactions are returned. For
         example, if a user interacted with items [1, 2, 3, 4, 5], the
-        returned interactions matrix at sequence length 5 and step size
+        returned interactions set at sequence length 5 and step size
         1 will be be given by:
 
         .. code-block:: python
 
            [[1, 2, 3, 4, 5],
-            [0, 1, 2, 3, 4],
-            [0, 0, 1, 2, 3],
-            [0, 0, 0, 1, 2],
-            [0, 0, 0, 0, 1]]
+            [1, 2, 3, 4],
+            [1, 2, 3],
+            [1, 2],
+            [1]]
 
         At step size 2:
 
         .. code-block:: python
 
            [[1, 2, 3, 4, 5],
-            [0, 0, 1, 2, 3],
-            [0, 0, 0, 0, 1]]
+            [1, 2, 3],
+            [1]]
 
         Parameters
         ----------
 
         max_sequence_length: int, optional
-            Maximum sequence length. Subsequences shorter than this
-            will be left-padded with zeros.
+            Maximum sequence length.
         min_sequence_length: int, optional
             If set, only sequences with at least min_sequence_length
-            non-padding elements will be returned.
+            elements will be returned.
         step-size: int, optional
             The returned subsequences are the effect of moving a
             a sliding window over the input. This parameter
@@ -219,63 +263,32 @@ def to_sequence(self, max_sequence_length=10, min_sequence_length=None, step_siz
             The resulting sequence interactions.
         """
 
-        if self.timestamps is None:
-            raise ValueError('Cannot convert to sequences, '
-                             'timestamps not available.')
-
         if 0 in self.item_ids:
             raise ValueError('0 is used as an item id, conflicting '
                              'with the sequence padding value.')
 
-        if step_size is None:
-            step_size = max_sequence_length
-
-        # Sort first by user id, then by timestamp
-        sort_indices = np.lexsort((self.timestamps,
-                                   self.user_ids))
-
-        user_ids = self.user_ids[sort_indices]
-        item_ids = self.item_ids[sort_indices]
-
-        user_ids, indices, counts = np.unique(user_ids,
-                                              return_index=True,
-                                              return_counts=True)
-
-        num_subsequences = int(np.ceil(counts / float(step_size)).sum())
-
-        sequences = np.zeros((num_subsequences, max_sequence_length),
-                             dtype=np.int32)
-        sequence_users = np.empty(num_subsequences,
-                                  dtype=np.int32)
-        for i, (uid,
-                seq) in enumerate(_generate_sequences(user_ids,
-                                                      item_ids,
-                                                      indices,
-                                                      max_sequence_length,
-                                                      step_size)):
-            sequences[i][-len(seq):] = seq
-            sequence_users[i] = uid
-
-        if min_sequence_length is not None:
-            long_enough = sequences[:, -min_sequence_length] != 0
-            sequences = sequences[long_enough]
-            sequence_users = sequence_users[long_enough]
+        sequences = _pack_sequences(
+            _generate_sequences(self,
+                                max_sequence_length,
+                                min_sequence_length,
+                                step_size)
+        )
 
         return (SequenceInteractions(sequences,
-                                     user_ids=sequence_users,
                                      num_items=self.num_items))
 
 
 class SequenceInteractions(object):
     """
-    Interactions encoded as a sequence matrix.
+    Interactions encoded as sequences. This object is not normally constructed
+    directly, but rather returned from :func:`~Interactions.to_sequence`.
 
     Parameters
     ----------
 
-    sequences: array of np.int32 of shape (num_sequences x max_sequence_length)
-        The interactions sequence matrix, as produced by
-        :func:`~Interactions.to_sequence`
+    sequences: dict of np.int64 arrays of shape (num_sequences x sequence_length)
+        The interactions sequence, dictionary grouping all
+        subsequences into matrices by their length.
     num_items: int, optional
         The number of distinct items in the data
 
@@ -289,24 +302,80 @@ class SequenceInteractions(object):
 
     def __init__(self,
                  sequences,
-                 user_ids=None, num_items=None):
+                 num_items=None):
 
         self.sequences = sequences
-        self.user_ids = user_ids
-        self.max_sequence_length = sequences.shape[1]
+
+        self._num_sequences = sum(x.shape[0] for x in self.sequences.values())
+        self._max_sequence_length = max(x.shape[1] for x in self.sequences.values())
 
         if num_items is None:
-            self.num_items = sequences.max() + 1
+            self.num_items = max(x.max() + 1 for x in self.sequences.values())
         else:
             self.num_items = num_items
 
     def __repr__(self):
 
-        num_sequences, sequence_length = self.sequences.shape
-
         return ('<Sequence interactions dataset ({num_sequences} '
-                'sequences x {sequence_length} sequence length)>'
+                'sequences x {sequence_length} max sequence length)>'
                 .format(
-                    num_sequences=num_sequences,
-                    sequence_length=sequence_length,
+                    num_sequences=self._num_sequences,
+                    sequence_length=self._max_sequence_length,
                 ))
+
+    def __iter__(self):
+
+        for value in self.sequences.values():
+            for row in value:
+                yield row
+
+    def minibatch(self, batch_size, random_state=None, use_cuda=False, only_full_batches=True):
+        """
+        Iterate over minibatches of the dataset. Each minibatch has the same sequence length,
+        but the lengths of each minibatche can differ to avoid padding. Minibatch order as
+        well as sequences within a minibatch are shuffled on each epoch.
+
+        Parameters
+        ----------
+
+        batch_size: int
+            The size of each minibatch. Minibatches smaller than this will not be emitted
+            if `only_full_batches` is `True` (default).
+        random_state: instance of numpy.random.RandomState, optional
+            Random generator to use when returning the minibatches.
+        use_cuda: bool, optional
+            Whether to send the minibatch data to the GPU.
+        only_full_batches: bool, optional
+            If `True`, minibatches smaller than `batch_size` are omitted. This is helpful
+            if loss values are averaged across minibatch. In that case, minibatches with
+            few examples would have a disproportionately large effect on the gradients.
+        """
+
+        if random_state is None:
+            random_state = np.random.RandomState()
+
+        # Shuffle the sequences within blocks of same length.
+        for value in self.sequences.values():
+            random_state.shuffle(value)
+
+        # Build a list of minibatches to execute that
+        # randomly alternates between the lengths.
+        minibatches = []
+
+        for key, value in self.sequences.items():
+            for i in range(0, len(value), batch_size):
+                minibatches.append([key, i, i + batch_size])
+
+        minibatches = np.array(minibatches, dtype=np.int64)
+        random_state.shuffle(minibatches)
+
+        # Convert to tensors (and possibly transfer to GPU).
+        tensor_data = {k: gpu(torch.from_numpy(v), use_cuda) for (k, v) in self.sequences.items()}
+
+        for (key, start, stop) in minibatches:
+            btch = tensor_data[key][start:stop]
+
+            if len(btch) != batch_size:
+                continue
+
+            yield btch
diff --git a/spotlight/sequence/implicit.py b/spotlight/sequence/implicit.py
index ba0d2171..86f6414c 100644
--- a/spotlight/sequence/implicit.py
+++ b/spotlight/sequence/implicit.py
@@ -12,13 +12,14 @@
 from torch.autograd import Variable
 
 from spotlight.helpers import _repr_model
+from spotlight.interactions import SequenceInteractions
 from spotlight.losses import (adaptive_hinge_loss,
                               bpr_loss,
                               hinge_loss,
                               pointwise_loss)
 from spotlight.sequence.representations import PADDING_IDX, CNNNet, LSTMNet, PoolNet
 from spotlight.sampling import sample_items
-from spotlight.torch_utils import cpu, gpu, minibatch, set_seed, shuffle
+from spotlight.torch_utils import cpu, gpu, set_seed
 
 
 class ImplicitSequenceModel(object):
@@ -70,7 +71,7 @@ class ImplicitSequenceModel(object):
     .. code-block:: python
 
        [[1, 2, 3, 4, 5],
-        [0, 0, 7, 1, 4]]
+        [7, 1, 4]]
 
 
     In this case, the loss for the first example will be the mean loss
@@ -175,7 +176,9 @@ def _initialize(self, interactions):
 
     def _check_input(self, item_ids):
 
-        if isinstance(item_ids, int):
+        if isinstance(item_ids, SequenceInteractions):
+            item_id_max = item_ids.num_items - 1
+        elif isinstance(item_ids, int):
             item_id_max = item_ids
         else:
             item_id_max = item_ids.max()
@@ -199,25 +202,19 @@ def fit(self, interactions, verbose=False):
             The input sequence dataset.
         """
 
-        sequences = interactions.sequences.astype(np.int64)
-
         if not self._initialized:
             self._initialize(interactions)
 
-        self._check_input(sequences)
+        self._check_input(interactions)
 
         for epoch_num in range(self._n_iter):
 
-            sequences = shuffle(sequences,
-                                random_state=self._random_state)
-
-            sequences_tensor = gpu(torch.from_numpy(sequences),
-                                   self._use_cuda)
-
             epoch_loss = 0.0
 
-            for minibatch_num, batch_sequence in enumerate(minibatch(sequences_tensor,
-                                                                     batch_size=self._batch_size)):
+            for (minibatch_num,
+                 batch_sequence) in enumerate(interactions
+                                              .minibatch(batch_size=self._batch_size,
+                                                         random_state=self._random_state)):
 
                 sequence_var = Variable(batch_sequence)
 
diff --git a/spotlight/torch_utils.py b/spotlight/torch_utils.py
index f425fa30..287204b0 100644
--- a/spotlight/torch_utils.py
+++ b/spotlight/torch_utils.py
@@ -32,6 +32,19 @@ def minibatch(*tensors, **kwargs):
             yield tuple(x[i:i + batch_size] for x in tensors)
 
 
+def minibatch_indices(*tensors, **kwargs):
+
+    batch_size = kwargs.get('batch_size', 128)
+
+    if len(tensors) == 1:
+        tensor = tensors[0]
+        for i in range(0, len(tensor), batch_size):
+            yield tensor[i:i + batch_size]
+    else:
+        for i in range(0, len(tensors[0]), batch_size):
+            yield tuple(x[i:i + batch_size] for x in tensors)
+
+
 def shuffle(*arrays, **kwargs):
 
     random_state = kwargs.get('random_state')
diff --git a/tests/sequence/test_sequence_implicit.py b/tests/sequence/test_sequence_implicit.py
index 378c1cc8..d0d2ea79 100644
--- a/tests/sequence/test_sequence_implicit.py
+++ b/tests/sequence/test_sequence_implicit.py
@@ -103,7 +103,7 @@ def test_implicit_lstm_synthetic(randomness, expected_mrr):
                                   embedding_dim=EMBEDDING_DIM,
                                   learning_rate=1e-2,
                                   l2=1e-7,
-                                  n_iter=NUM_EPOCHS * 5,
+                                  n_iter=NUM_EPOCHS * 10,
                                   random_state=random_state,
                                   use_cuda=CUDA)
 
@@ -204,7 +204,7 @@ def test_implicit_pooling_losses(loss, expected_mrr):
 @pytest.mark.parametrize('compression_ratio, expected_mrr', [
     (0.2, 0.14),
     (0.5, 0.30),
-    (1.0, 0.5),
+    (1.0, 0.48),
 ])
 def test_bloom_cnn(compression_ratio, expected_mrr):
 
diff --git a/tests/test_interactions.py b/tests/test_interactions.py
index 70f13610..5ca21e75 100644
--- a/tests/test_interactions.py
+++ b/tests/test_interactions.py
@@ -4,59 +4,39 @@
 
 from spotlight.cross_validation import random_train_test_split
 from spotlight.datasets import movielens
-from spotlight.interactions import Interactions
+from spotlight.interactions import Interactions, _generate_sequences
 
 
-def _test_just_padding(sequences):
-    """
-    There should be no rows with only padding in them.
-    """
-
-    row_sum = sequences.sum(axis=1)
-
-    assert len(row_sum) == sequences.shape[0]
-    assert np.all(row_sum > 0)
-
-
-def _test_final_column_no_padding(sequences):
-    """
-    The final column should always have an interaction.
-    """
-
-    assert np.all(sequences[:, -1] > 0)
-
-
-def _test_shifted(sequence_users, sequences, step_size):
+def _test_shifted(sequences, step_size):
     """
     Unless there was a change of user, row i + 1's interactions
     should contain row i's interactions shifted to the right by
     step size.
     """
 
-    for i in range(1, len(sequences)):
+    previous_uid = None
+    previous_sequence = None
 
-        if sequence_users[i] != sequence_users[i - 1]:
-            # Change of user
-            continue
+    for user_id, sequence in sequences:
+        if previous_uid == user_id:
+            assert (np.all(sequence[-len(previous_sequence) + step_size:] ==
+                           previous_sequence[:-step_size]))
 
-        assert np.all(sequences[i][step_size:] == sequences[i - 1][:-step_size])
+        previous_uid = user_id
+        previous_sequence = sequence
 
 
-def _test_temporal_order(sequence_users, sequences, interactions):
+def _test_temporal_order(sequences, interactions):
 
     interaction_matrix = interactions.tocoo()
     interaction_matrix.data = interactions.timestamps
     interaction_matrix = interaction_matrix.tocsr().todense()
 
-    for i, sequence in enumerate(sequences):
-
-        user_id = sequence_users[i]
-        nonpadded_sequence = sequence[sequence != 0]
+    for user_id, sequence in sequences:
+        for j in range(0, len(sequence) - 1):
+            item_id = sequence[j]
 
-        for j in range(0, len(nonpadded_sequence) - 1):
-            item_id = nonpadded_sequence[j]
-
-            next_item_id = nonpadded_sequence[j + 1]
+            next_item_id = sequence[j + 1]
 
             item_timestamp = interaction_matrix[user_id, item_id]
             next_item_timestamp = interaction_matrix[user_id, next_item_id]
@@ -69,18 +49,19 @@ def test_known_output_step_1():
     interactions = Interactions(np.zeros(5),
                                 np.arange(5) + 1,
                                 timestamps=np.arange(5))
-    sequences = interactions.to_sequence(max_sequence_length=5,
-                                         step_size=1).sequences
+    sequences = list(v.tolist() for (_, v) in _generate_sequences(interactions,
+                                                                  max_sequence_length=5,
+                                                                  step_size=1))
 
-    expected = np.array([
+    expected = [
         [1, 2, 3, 4, 5],
-        [0, 1, 2, 3, 4],
-        [0, 0, 1, 2, 3],
-        [0, 0, 0, 1, 2],
-        [0, 0, 0, 0, 1]
-    ])
+        [1, 2, 3, 4],
+        [1, 2, 3],
+        [1, 2],
+        [1]
+    ]
 
-    assert np.all(sequences == expected)
+    assert sequences == expected
 
 
 def test_known_output_step_2():
@@ -88,16 +69,17 @@ def test_known_output_step_2():
     interactions = Interactions(np.zeros(5),
                                 np.arange(5) + 1,
                                 timestamps=np.arange(5))
-    sequences = interactions.to_sequence(max_sequence_length=5,
-                                         step_size=2).sequences
+    sequences = list(v.tolist() for (_, v) in _generate_sequences(interactions,
+                                                                  max_sequence_length=5,
+                                                                  step_size=2))
 
-    expected = np.array([
+    expected = [
         [1, 2, 3, 4, 5],
-        [0, 0, 1, 2, 3],
-        [0, 0, 0, 0, 1],
-    ])
+        [1, 2, 3],
+        [1],
+    ]
 
-    assert np.all(sequences == expected)
+    assert sequences == expected
 
 
 @pytest.mark.parametrize('max_sequence_length, step_size', [
@@ -113,23 +95,15 @@ def test_to_sequence(max_sequence_length, step_size):
     interactions = movielens.get_movielens_dataset('100K')
     _, interactions = random_train_test_split(interactions)
 
-    sequences = interactions.to_sequence(
-        max_sequence_length=max_sequence_length,
-        step_size=step_size)
-
-    if step_size == 1:
-        assert sequences.sequences.shape == (len(interactions),
-                                             max_sequence_length)
-    else:
-        assert sequences.sequences.shape[1] == max_sequence_length
+    def seqs():
+        return _generate_sequences(
+            interactions,
+            max_sequence_length=max_sequence_length,
+            step_size=step_size)
 
-    _test_just_padding(sequences.sequences)
-    _test_final_column_no_padding(sequences.sequences)
-    _test_shifted(sequences.user_ids,
-                  sequences.sequences,
+    _test_shifted(seqs(),
                   step_size)
-    _test_temporal_order(sequences.user_ids,
-                         sequences.sequences,
+    _test_temporal_order(seqs(),
                          interactions)
 
 
@@ -138,12 +112,16 @@ def test_to_sequence_min_length():
     min_sequence_length = 10
     interactions = movielens.get_movielens_dataset('100K')
 
+    def seqs(min_sequence_length):
+        return _generate_sequences(
+            interactions,
+            max_sequence_length=10,
+            min_sequence_length=min_sequence_length,
+            step_size=1)
+
     # Check that with default arguments there are sequences
     # that are shorter than we want
-    sequences = interactions.to_sequence(max_sequence_length=20)
-    assert np.any((sequences.sequences != 0).sum(axis=1) < min_sequence_length)
+    assert any(len(v) < min_sequence_length for (_, v) in seqs(min_sequence_length=1))
 
     # But no such sequences after we specify min length.
-    sequences = interactions.to_sequence(max_sequence_length=20,
-                                         min_sequence_length=min_sequence_length)
-    assert not np.any((sequences.sequences != 0).sum(axis=1) < min_sequence_length)
+    assert not any(len(v) < min_sequence_length for (_, v) in seqs(min_sequence_length=20))