How to deal with large images in python
Dealing with seismic/medical images is a challenge because of large memory footprint. Here is a nice code using zarr
to address this – see the clean bit of code using itertools
and slice
# based on code from zarr.convenience -- see
# https://zarr.readthedocs.io/en/stable/_modules/zarr/convenience.html
counter_filename = f'pred_counter_{image_id}.zarr'
print(f'zarr filename {counter_filename}')
pred_counter_store = zarr.DirectoryStore(counter_filename)
pred_counter = zarr.creation.open_array(
store=pred_counter_store, mode='r')
mask_filename = f'pred_mask_{image_id}.zarr'
print(f'zarr filename {mask_filename}')
pred_mask_store = zarr.DirectoryStore(mask_filename)
pred_mask = zarr.creation.open_array(
store=pred_mask_store, mode='r')
# this code uses zarr for out of memory chunked arrays
t0 = time.time()
# copy data - N.B., go chunk by chunk to avoid loading everything into memory
shape = pred_counter.shape
chunks = pred_counter.chunks
pred_mask_np = np.zeros(pred_mask.shape, dtype=np.bool)
chunk_offsets = [range(0, s, c) for s, c in zip(shape, chunks)]
for offset in itertools.product(*chunk_offsets):
sel = tuple(slice(o, min(s, o + c))
for o, s, c in zip(offset, shape, chunks))
pred_counter_temp_ = np.copy(pred_counter[sel])
pred_counter_temp_[pred_counter_temp_==0] = 1
pred_mask_np[sel] = np.greater(np.divide(pred_mask[sel], pred_counter_temp_), 0.5)
print(f'Done in {time.time()-t0} sec')