# NvComp Decompression in Python

In [None]:
import cupy as cp
import numpy as np
from kvikio.nvcomp_codec import NvCompBatchCodec
import zstandard as zstd

## Unchunked Decompression

In [None]:
# Create a sample NumPy array
dtype = np.float32
shape = (10_000, 10_000)
data_CPU = np.random.default_rng().random(shape, dtype=dtype)

# Compress the data using zstd
compressor = zstd.ZstdCompressor()
compressed_data = compressor.compress(data_CPU.tobytes())

### GPU 

In [None]:
%%time
outGPU = cp.empty(shape, dtype=dtype)
# Decode compressed data on GPU
codec = NvCompBatchCodec("zstd")
decompressed_data_byGPU = codec.decode(compressed_data, out = outGPU).view(dtype).reshape(shape) 
# will return to CPU npArray if not given cpArray for output

### CPU

In [None]:
%%time
# Decompress the data using zstd
decompressor = zstd.ZstdDecompressor()
data_decompressed_CPU = decompressor.decompress(compressed_data)

# Convert the bytes back to a NumPy array
decompressed_data_byCPU = np.frombuffer(data_decompressed_CPU, dtype=dtype).reshape(shape)

### Validation

In [None]:
assert np.all(decompressed_data_byGPU.get() == decompressed_data_byCPU)

## Chunked Decompression

In [None]:
# Generate chunks of data
shape_chunk = (100, 10_000)
nChunks = 100
shape = (10_000, 10_000)
dtype = np.float32

chunks = [np.random.default_rng().random(shape_chunk, dtype=dtype) for i in range(nChunks)]
chunks_combined = np.concatenate(chunks)
print("Shape of input data: ", chunks_combined.shape)

# Compress the data using zstd unchunked
compressor = zstd.ZstdCompressor()
compressed_data = compressor.compress(chunks_combined.tobytes())

# Compress the data using zstd chunked
compressed_data_chunks = []
for c in chunks:
    compressed_data_chunks.append(compressor.compress(c.tobytes()))

### CPU

In [None]:
%%time
# Decompress the data using zstd
decompressor = zstd.ZstdDecompressor()
data_decompressed_CPU = decompressor.decompress(compressed_data)
nparray_decompressed_CPU = np.frombuffer(data_decompressed_CPU, dtype=dtype).reshape(shape)

In [None]:
assert (chunks_combined == nparray_decompressed_CPU).all()

### GPU

In [None]:
%%time
codec = NvCompBatchCodec("zstd")
out_buf = [cp.empty_like(c, dtype = dtype) for c in chunks]

decompressed_data_byGPU = codec.decode_batch(compressed_data_chunks, out = out_buf) # will return to CPU npArray if not given cpArray for output
cparray_decompressed_GPU = cp.concatenate(decompressed_data_byGPU, axis = 0).reshape(shape)

### Validation

In [None]:
assert np.all(cparray_decompressed_GPU.get() == nparray_decompressed_CPU)

# ROOT I/O on GPU

## Current way to get ROOT  data -> GPU
Reads, basket processing, and decompression done on CPU

In [3]:
import uproot
import awkward as ak

In [4]:
%%timeit
path = "/home/fstrug/uscmshome/nobackup/GPU/kvikio_playground/TTToSemiLeptonic_UL18JMENanoAOD-zstd.root:Events"
branches = ["Electron_pt", "Electron_eta", "Electron_phi",
            "Muon_pt", "Muon_eta", "Muon_phi",
            "FatJet_pt", "FatJet_eta", "FatJet_phi",
            "Jet_pt", "Jet_eta", "Jet_phi",
            "MET_pt"]
TTree = uproot.open(path)
events_awkuproot = ak.to_backend(TTree.arrays(branches), "cuda")

1.61 s ± 76.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
print("Array backend: ", ak.backend(awk_uproot))

## kvikUproot
Reads*, basket processing, and decompression done by GPU

*CuFile running in compatibility mode on EAF, no GPU Direct Storage support, does not currently use GPU for reads

In [5]:
from kvikUproot_demo import *

In [6]:
%%timeit
path = "/home/fstrug/uscmshome/nobackup/GPU/kvikio_playground/TTToSemiLeptonic_UL18JMENanoAOD-zstd.root"
branches = ["Electron_pt", "Electron_eta", "Electron_phi",
            "Muon_pt", "Muon_eta", "Muon_phi",
            "FatJet_pt", "FatJet_eta", "FatJet_phi",
            "Jet_pt", "Jet_eta", "Jet_phi",
            "MET_pt"]
TTree_name = "Events"
events_kvikIO = kvikuproot_open(path, branches, TTree_name)

1.47 s ± 15.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Validation

In [None]:
for branch in branches:
    assert ak.all(events_kvikIO[branch] == events_awkuproot[branch])