{ "cells": [ { "cell_type": "markdown", "id": "ee00a3e2", "metadata": {}, "source": [ "# GPU backend" ] }, { "cell_type": "markdown", "id": "58d18a3a-45b1-425a-b822-e8be0a6c0bc0", "metadata": {}, "source": [ "This example depends on data in a file that can be made in the following way.\n", "\n", "```python\n", "import awkward as ak\n", "\n", "def make_data(fn, N=1000000):\n", " part = [[[1, 2, 3], [], [4, 5]],\n", " [[6, 7]]] * N\n", " arr = ak.Array({\"a\": part})\n", " ak.to_parquet(arr, fn, extensionarray=False)\n", "```\n", "\n", "The file cuda-env.yaml can be used to create a functional environment using conda:\n", "```bash\n", "$ conda env create -f example/cuda-env.yaml\n", "```" ] }, { "cell_type": "code", "execution_count": 3, "id": "cefd8e53-a56f-4b0c-88d2-d662d59849a7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('2.7.1', '2024.10.1.dev9+g9f64d31')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import awkward as ak\n", "import cupy as cp\n", "import cudf\n", "import numpy as np\n", "import akimbo.cudf\n", "import subprocess\n", "\n", "def gpu_mem():\n", " return\n", " print(subprocess.check_output(\"nvidia-smi | grep py\", shell=True).split()[-2].decode())\n", "\n", "ak.__version__, akimbo.__version__" ] }, { "cell_type": "code", "execution_count": 4, "id": "0490043a-564a-4c11-bb0d-a54fb4c6fb10", "metadata": { "scrolled": true }, "outputs": [], "source": [ "df = cudf.read_parquet(\"s.parquet\")\n", "gpu_mem()" ] }, { "cell_type": "code", "execution_count": 5, "id": "e29ff9a4-60e4-4260-9a44-c135ad6d7d6b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a list\n", "dtype: object" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.dtypes" ] }, { "cell_type": "code", "execution_count": 6, "id": "58d16a80-041e-4260-8c56-9de932dde557", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a [[1, 2, 3], [], [4, 5]]\n", "Name: 0, dtype: list" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[0] # each element is list-of-lists" ] }, { "cell_type": "code", "execution_count": 7, "id": "c7b65320-e1fa-44b2-a232-6ffb97ba1d18", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "['Mask',\n", " 'all',\n", " 'almost_equal',\n", " 'angle',\n", " 'annotations',\n", " 'any',\n", " 'apply',\n", " 'argcartesian',\n", " 'argcombinations',\n", " 'argmax',\n", " 'argmin',\n", " 'argsort',\n", " 'array',\n", " 'array_equal',\n", " 'attrs',\n", " 'awkward',\n", " 'backend',\n", " 'behavior',\n", " 'behaviors',\n", " 'broadcast_arrays',\n", " 'broadcast_fields',\n", " 'builder',\n", " 'cartesian',\n", " 'categories',\n", " 'combinations',\n", " 'concatenate',\n", " 'contents',\n", " 'copy',\n", " 'corr',\n", " 'count',\n", " 'count_nonzero',\n", " 'covar',\n", " 'cpp_type',\n", " 'cppyy',\n", " 'drop_none',\n", " 'dt',\n", " 'enforce_type',\n", " 'errors',\n", " 'explode',\n", " 'fields',\n", " 'fill_none',\n", " 'firsts',\n", " 'flatten',\n", " 'forms',\n", " 'forth',\n", " 'from_arrow',\n", " 'from_arrow_schema',\n", " 'from_avro_file',\n", " 'from_buffers',\n", " 'from_categorical',\n", " 'from_cupy',\n", " 'from_dlpack',\n", " 'from_feather',\n", " 'from_iter',\n", " 'from_jax',\n", " 'from_json',\n", " 'from_numpy',\n", " 'from_parquet',\n", " 'from_raggedtensor',\n", " 'from_rdataframe',\n", " 'from_regular',\n", " 'from_tensorflow',\n", " 'from_torch',\n", " 'full_like',\n", " 'highlevel',\n", " 'imag',\n", " 'index',\n", " 'is_categorical',\n", " 'is_none',\n", " 'is_tuple',\n", " 'is_valid',\n", " 'isclose',\n", " 'jax',\n", " 'layout',\n", " 'linear_fit',\n", " 'local_index',\n", " 'mask',\n", " 'max',\n", " 'mean',\n", " 'merge_option_of_records',\n", " 'merge_union_of_records',\n", " 'metadata_from_parquet',\n", " 'min',\n", " 'mixin_class',\n", " 'mixin_class_method',\n", " 'moment',\n", " 'named_axis',\n", " 'nan_to_none',\n", " 'nan_to_num',\n", " 'nanargmax',\n", " 'nanargmin',\n", " 'nanmax',\n", " 'nanmean',\n", " 'nanmin',\n", " 'nanprod',\n", " 'nanstd',\n", " 'nansum',\n", " 'nanvar',\n", " 'nbytes',\n", " 'ndim',\n", " 'num',\n", " 'numba',\n", " 'numba_type',\n", " 'ones_like',\n", " 'operations',\n", " 'pad_none',\n", " 'parameters',\n", " 'positional_axis',\n", " 'prettyprint',\n", " 'prod',\n", " 'ptp',\n", " 'ravel',\n", " 'real',\n", " 'record',\n", " 'round',\n", " 'run_lengths',\n", " 'show',\n", " 'singletons',\n", " 'softmax',\n", " 'sort',\n", " 'std',\n", " 'str',\n", " 'strings_astype',\n", " 'sum',\n", " 'to_arrow',\n", " 'to_arrow_table',\n", " 'to_backend',\n", " 'to_buffers',\n", " 'to_cudf',\n", " 'to_cupy',\n", " 'to_dataframe',\n", " 'to_feather',\n", " 'to_jax',\n", " 'to_json',\n", " 'to_layout',\n", " 'to_list',\n", " 'to_numpy',\n", " 'to_packed',\n", " 'to_parquet',\n", " 'to_parquet_dataset',\n", " 'to_parquet_row_groups',\n", " 'to_raggedtensor',\n", " 'to_rdataframe',\n", " 'to_regular',\n", " 'to_tensorflow',\n", " 'to_torch',\n", " 'tolist',\n", " 'transform',\n", " 'type',\n", " 'types',\n", " 'typestr',\n", " 'typetracer',\n", " 'unflatten',\n", " 'unpack',\n", " 'unzip',\n", " 'validity_error',\n", " 'values_astype',\n", " 'var',\n", " 'where',\n", " 'with_field',\n", " 'with_name',\n", " 'with_named_axis',\n", " 'with_parameter',\n", " 'without_field',\n", " 'without_named_axis',\n", " 'without_parameters',\n", " 'zeros_like',\n", " 'zip']" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# allows all ak.* namespace, many identical to numpy equivalents\n", "dir(df.a.ak)" ] }, { "cell_type": "code", "execution_count": 8, "id": "8ff11e13-8503-4d79-a64c-993028709ca4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(28000000)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.a.ak.sum(axis=None)" ] }, { "cell_type": "code", "execution_count": 9, "id": "2dd99fe5-0523-46c9-87ec-1392070f5139", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "cupy.ndarray" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# if output was array-like, it stays on the GPU\n", "type(_)" ] }, { "cell_type": "code", "execution_count": 11, "id": "9d8e55cf-8cf1-40a0-8733-24b7719f431d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "12.6 ms ± 779 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "# fast reduction across three levels of nesting\n", "%timeit df.a.ak.sum(axis=None)" ] }, { "cell_type": "code", "execution_count": 12, "id": "fae94aea-d9cf-4228-bcab-f843c7cc9c98", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [[-1, -2, -3], [], [-4, -5]]\n", "1 [[-6, -7]]\n", "2 [[-1, -2, -3], [], [-4, -5]]\n", "3 [[-6, -7]]\n", "4 [[-1, -2, -3], [], [-4, -5]]\n", " ... \n", "1999995 [[-6, -7]]\n", "1999996 [[-1, -2, -3], [], [-4, -5]]\n", "1999997 [[-6, -7]]\n", "1999998 [[-1, -2, -3], [], [-4, -5]]\n", "1999999 [[-6, -7]]\n", "Length: 2000000, dtype: list" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# ufunc maintains structure\n", "np.negative(df.a.ak)" ] }, { "cell_type": "code", "execution_count": 10, "id": "1b83da2c-5e15-42f6-b594-f2ebaece5ac8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "256MiB\n" ] } ], "source": [ "gpu_mem() # created new arrays on GPU, made new cuDF series" ] }, { "cell_type": "code", "execution_count": 13, "id": "558ca2c3-d6c7-4404-bcab-557b9b03f795", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 [[2, 3, 4], [], [5, 6]]\n", "1 [[7, 8]]\n", "2 [[2, 3, 4], [], [5, 6]]\n", "3 [[7, 8]]\n", "4 [[2, 3, 4], [], [5, 6]]\n", "dtype: list\n" ] } ], "source": [ "# operator overload\n", "print((df.a.ak + 1).head())" ] }, { "cell_type": "markdown", "id": "bb51c8c3-42cf-4999-b688-67703f7311d2", "metadata": {}, "source": [ "#### numba" ] }, { "cell_type": "code", "execution_count": 14, "id": "d240ea54-87b4-4b99-b67f-b2f885a4bf5e", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "array([15, 13, 15, ..., 13, 15, 13], dtype=int32)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numba.cuda\n", "ak.numba.register_and_check()\n", "\n", "@numba.cuda.jit(extensions=[ak.numba.cuda])\n", "def inner_sum(array, out):\n", " tid = numba.cuda.grid(1)\n", " if tid < len(array):\n", " out[tid] = 0\n", " for x in array[tid]:\n", " for y in x:\n", " out[tid] += y\n", "\n", "out = cp.empty(len(df.a), dtype=\"int32\")\n", "blocksize = 256\n", "numblocks = (len(df.a) + blocksize - 1) // blocksize\n", "\n", "df.a.ak.apply(lambda x: inner_sum[numblocks, blocksize](ak.drop_none(x, axis=0), out))\n", "out\n" ] }, { "cell_type": "code", "execution_count": 15, "id": "73a35144-292f-4b1d-bbc0-4ebba2a84b0d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "16.7 ms ± 233 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%timeit df.a.ak.apply(lambda x: inner_sum[numblocks, blocksize](ak.drop_none(x, axis=0), out))" ] }, { "cell_type": "code", "execution_count": 14, "id": "bb781ca6-bdbd-4659-9885-8c634f490fca", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "264MiB\n" ] } ], "source": [ "gpu_mem() " ] }, { "cell_type": "markdown", "id": "6d1ffd1a-b53b-4657-bab6-9c9223c28808", "metadata": {}, "source": [ "**slice**" ] }, { "cell_type": "code", "execution_count": 16, "id": "d039a508-e77c-4e23-a583-ec7997a88bb1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [[1], [], [4]]\n", "1 [[6]]\n", "2 [[1], [], [4]]\n", "3 [[6]]\n", "4 [[1], [], [4]]\n", " ... \n", "1999995 [[6]]\n", "1999996 [[1], [], [4]]\n", "1999997 [[6]]\n", "1999998 [[1], [], [4]]\n", "1999999 [[6]]\n", "Length: 2000000, dtype: list" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# pick the first number of the innermost lists, if there is one\n", "df.a.ak[:, :, :1]" ] }, { "cell_type": "code", "execution_count": 17, "id": "f149dfaf-c01e-4d0a-8e01-2d20623d216f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [1, 2, 3]\n", "1 [6, 7]\n", "2 [1, 2, 3]\n", "3 [6, 7]\n", "4 [1, 2, 3]\n", " ... \n", "1999995 [6, 7]\n", "1999996 [1, 2, 3]\n", "1999997 [6, 7]\n", "1999998 [1, 2, 3]\n", "1999999 [6, 7]\n", "Length: 2000000, dtype: list" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# pick the first inner list of each row\n", "df.a.ak[:, 0, :]" ] }, { "cell_type": "code", "execution_count": null, "id": "5aaf1903-6a6a-456f-89a7-3dedb01520ad", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 5 }