How to use LMDB with PyTorch DataLoader and DistributedDataParallel

1 minute read

Since LMDB cannot be pickled, an error, ...can't pickle Environment Object..., occurs when we naively implement LMDB into data.dataset while wrapping data.DataLoader with Distributed Data Parallel(DDP). To resolve the error, we need to delay the loading of LMDB environment in data.dataset;

# This code is modified from

class my_dataset_LMDB(data.Dataset):
    def __init__(self, db_path, file_path) 
        self.db_path = db_path
        self.file_path = file_path

        # Delay loading LMDB data until after initialization to avoid "can't pickle Environment Object error"
        self.env = None
        self.txn = None

    def _init_db(self):
        self.env =, subdir=os.path.isdir(self.db_path),
            readonly=True, lock=False,
            readahead=False, meminit=False)
        self.txn = self.env.begin()

    def read_lmdb(self, key):
        lmdb_data = self.txn.get(key.encode())
        lmdb_data = np.frombuffer(lmdb_data)

        return lmdb_data

    def __getitem__(self, index):
        # Delay loading LMDB data until after initialization
        if self.env is None:

        file_name = self.file_paths[index]
        data = self.read_lmdb(file_name)

Note: By the way, the option lock=False fixes the error, MDB_READERS_FULL: Environment maxreaders limit reached.