{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "ee00a3e2",
   "metadata": {},
   "source": [
    "# GPU backend"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "58d18a3a-45b1-425a-b822-e8be0a6c0bc0",
   "metadata": {},
   "source": [
    "This example depends on data in a file that can be made in the following way.\n",
    "\n",
    "```python\n",
    "import awkward as ak\n",
    "\n",
    "def make_data(fn, N=1000000):\n",
    "    part = [[[1, 2, 3], [], [4, 5]],\n",
    "            [[6, 7]]] * N\n",
    "    arr = ak.Array({\"a\": part})\n",
    "    ak.to_parquet(arr, fn, extensionarray=False)\n",
    "```\n",
    "\n",
    "The file cuda-env.yaml can be used to create a functional environment using conda:\n",
    "```bash\n",
    "$ conda env create -f example/cuda-env.yaml\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "cefd8e53-a56f-4b0c-88d2-d662d59849a7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('2.7.1', '2024.10.1.dev9+g9f64d31')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import awkward as ak\n",
    "import cupy as cp\n",
    "import cudf\n",
    "import numpy as np\n",
    "import akimbo.cudf\n",
    "import subprocess\n",
    "\n",
    "def gpu_mem():\n",
    "    return\n",
    "    print(subprocess.check_output(\"nvidia-smi | grep py\", shell=True).split()[-2].decode())\n",
    "\n",
    "ak.__version__, akimbo.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "0490043a-564a-4c11-bb0d-a54fb4c6fb10",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df = cudf.read_parquet(\"s.parquet\")\n",
    "gpu_mem()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e29ff9a4-60e4-4260-9a44-c135ad6d7d6b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    list\n",
       "dtype: object"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "58d16a80-041e-4260-8c56-9de932dde557",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    [[1, 2, 3], [], [4, 5]]\n",
       "Name: 0, dtype: list"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[0]  # each element is list-of-lists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c7b65320-e1fa-44b2-a232-6ffb97ba1d18",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Mask',\n",
       " 'all',\n",
       " 'almost_equal',\n",
       " 'angle',\n",
       " 'annotations',\n",
       " 'any',\n",
       " 'apply',\n",
       " 'argcartesian',\n",
       " 'argcombinations',\n",
       " 'argmax',\n",
       " 'argmin',\n",
       " 'argsort',\n",
       " 'array',\n",
       " 'array_equal',\n",
       " 'attrs',\n",
       " 'awkward',\n",
       " 'backend',\n",
       " 'behavior',\n",
       " 'behaviors',\n",
       " 'broadcast_arrays',\n",
       " 'broadcast_fields',\n",
       " 'builder',\n",
       " 'cartesian',\n",
       " 'categories',\n",
       " 'combinations',\n",
       " 'concatenate',\n",
       " 'contents',\n",
       " 'copy',\n",
       " 'corr',\n",
       " 'count',\n",
       " 'count_nonzero',\n",
       " 'covar',\n",
       " 'cpp_type',\n",
       " 'cppyy',\n",
       " 'drop_none',\n",
       " 'dt',\n",
       " 'enforce_type',\n",
       " 'errors',\n",
       " 'explode',\n",
       " 'fields',\n",
       " 'fill_none',\n",
       " 'firsts',\n",
       " 'flatten',\n",
       " 'forms',\n",
       " 'forth',\n",
       " 'from_arrow',\n",
       " 'from_arrow_schema',\n",
       " 'from_avro_file',\n",
       " 'from_buffers',\n",
       " 'from_categorical',\n",
       " 'from_cupy',\n",
       " 'from_dlpack',\n",
       " 'from_feather',\n",
       " 'from_iter',\n",
       " 'from_jax',\n",
       " 'from_json',\n",
       " 'from_numpy',\n",
       " 'from_parquet',\n",
       " 'from_raggedtensor',\n",
       " 'from_rdataframe',\n",
       " 'from_regular',\n",
       " 'from_tensorflow',\n",
       " 'from_torch',\n",
       " 'full_like',\n",
       " 'highlevel',\n",
       " 'imag',\n",
       " 'index',\n",
       " 'is_categorical',\n",
       " 'is_none',\n",
       " 'is_tuple',\n",
       " 'is_valid',\n",
       " 'isclose',\n",
       " 'jax',\n",
       " 'layout',\n",
       " 'linear_fit',\n",
       " 'local_index',\n",
       " 'mask',\n",
       " 'max',\n",
       " 'mean',\n",
       " 'merge_option_of_records',\n",
       " 'merge_union_of_records',\n",
       " 'metadata_from_parquet',\n",
       " 'min',\n",
       " 'mixin_class',\n",
       " 'mixin_class_method',\n",
       " 'moment',\n",
       " 'named_axis',\n",
       " 'nan_to_none',\n",
       " 'nan_to_num',\n",
       " 'nanargmax',\n",
       " 'nanargmin',\n",
       " 'nanmax',\n",
       " 'nanmean',\n",
       " 'nanmin',\n",
       " 'nanprod',\n",
       " 'nanstd',\n",
       " 'nansum',\n",
       " 'nanvar',\n",
       " 'nbytes',\n",
       " 'ndim',\n",
       " 'num',\n",
       " 'numba',\n",
       " 'numba_type',\n",
       " 'ones_like',\n",
       " 'operations',\n",
       " 'pad_none',\n",
       " 'parameters',\n",
       " 'positional_axis',\n",
       " 'prettyprint',\n",
       " 'prod',\n",
       " 'ptp',\n",
       " 'ravel',\n",
       " 'real',\n",
       " 'record',\n",
       " 'round',\n",
       " 'run_lengths',\n",
       " 'show',\n",
       " 'singletons',\n",
       " 'softmax',\n",
       " 'sort',\n",
       " 'std',\n",
       " 'str',\n",
       " 'strings_astype',\n",
       " 'sum',\n",
       " 'to_arrow',\n",
       " 'to_arrow_table',\n",
       " 'to_backend',\n",
       " 'to_buffers',\n",
       " 'to_cudf',\n",
       " 'to_cupy',\n",
       " 'to_dataframe',\n",
       " 'to_feather',\n",
       " 'to_jax',\n",
       " 'to_json',\n",
       " 'to_layout',\n",
       " 'to_list',\n",
       " 'to_numpy',\n",
       " 'to_packed',\n",
       " 'to_parquet',\n",
       " 'to_parquet_dataset',\n",
       " 'to_parquet_row_groups',\n",
       " 'to_raggedtensor',\n",
       " 'to_rdataframe',\n",
       " 'to_regular',\n",
       " 'to_tensorflow',\n",
       " 'to_torch',\n",
       " 'tolist',\n",
       " 'transform',\n",
       " 'type',\n",
       " 'types',\n",
       " 'typestr',\n",
       " 'typetracer',\n",
       " 'unflatten',\n",
       " 'unpack',\n",
       " 'unzip',\n",
       " 'validity_error',\n",
       " 'values_astype',\n",
       " 'var',\n",
       " 'where',\n",
       " 'with_field',\n",
       " 'with_name',\n",
       " 'with_named_axis',\n",
       " 'with_parameter',\n",
       " 'without_field',\n",
       " 'without_named_axis',\n",
       " 'without_parameters',\n",
       " 'zeros_like',\n",
       " 'zip']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# allows all ak.* namespace, many identical to numpy equivalents\n",
    "dir(df.a.ak)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "8ff11e13-8503-4d79-a64c-993028709ca4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(28000000)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.a.ak.sum(axis=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "2dd99fe5-0523-46c9-87ec-1392070f5139",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "cupy.ndarray"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# if output was array-like, it stays on the GPU\n",
    "type(_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9d8e55cf-8cf1-40a0-8733-24b7719f431d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "12.6 ms ± 779 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "# fast reduction across three levels of nesting\n",
    "%timeit df.a.ak.sum(axis=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "fae94aea-d9cf-4228-bcab-f843c7cc9c98",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0          [[-1, -2, -3], [], [-4, -5]]\n",
       "1                            [[-6, -7]]\n",
       "2          [[-1, -2, -3], [], [-4, -5]]\n",
       "3                            [[-6, -7]]\n",
       "4          [[-1, -2, -3], [], [-4, -5]]\n",
       "                       ...             \n",
       "1999995                      [[-6, -7]]\n",
       "1999996    [[-1, -2, -3], [], [-4, -5]]\n",
       "1999997                      [[-6, -7]]\n",
       "1999998    [[-1, -2, -3], [], [-4, -5]]\n",
       "1999999                      [[-6, -7]]\n",
       "Length: 2000000, dtype: list"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ufunc maintains structure\n",
    "np.negative(df.a.ak)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "1b83da2c-5e15-42f6-b594-f2ebaece5ac8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "256MiB\n"
     ]
    }
   ],
   "source": [
    "gpu_mem()  # created new arrays on GPU, made new cuDF series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "558ca2c3-d6c7-4404-bcab-557b9b03f795",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    [[2, 3, 4], [], [5, 6]]\n",
      "1                   [[7, 8]]\n",
      "2    [[2, 3, 4], [], [5, 6]]\n",
      "3                   [[7, 8]]\n",
      "4    [[2, 3, 4], [], [5, 6]]\n",
      "dtype: list\n"
     ]
    }
   ],
   "source": [
    "# operator overload\n",
    "print((df.a.ak + 1).head())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bb51c8c3-42cf-4999-b688-67703f7311d2",
   "metadata": {},
   "source": [
    "#### numba"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "d240ea54-87b4-4b99-b67f-b2f885a4bf5e",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([15, 13, 15, ..., 13, 15, 13], dtype=int32)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numba.cuda\n",
    "ak.numba.register_and_check()\n",
    "\n",
    "@numba.cuda.jit(extensions=[ak.numba.cuda])\n",
    "def inner_sum(array, out):\n",
    "    tid = numba.cuda.grid(1)\n",
    "    if tid < len(array):\n",
    "        out[tid] = 0\n",
    "        for x in array[tid]:\n",
    "            for y in x:\n",
    "                out[tid] += y\n",
    "\n",
    "out = cp.empty(len(df.a), dtype=\"int32\")\n",
    "blocksize = 256\n",
    "numblocks = (len(df.a) + blocksize - 1) // blocksize\n",
    "\n",
    "df.a.ak.apply(lambda x: inner_sum[numblocks, blocksize](ak.drop_none(x, axis=0), out))\n",
    "out\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "73a35144-292f-4b1d-bbc0-4ebba2a84b0d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "16.7 ms ± 233 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit df.a.ak.apply(lambda x: inner_sum[numblocks, blocksize](ak.drop_none(x, axis=0), out))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "bb781ca6-bdbd-4659-9885-8c634f490fca",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "264MiB\n"
     ]
    }
   ],
   "source": [
    "gpu_mem() "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6d1ffd1a-b53b-4657-bab6-9c9223c28808",
   "metadata": {},
   "source": [
    "**slice**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "d039a508-e77c-4e23-a583-ec7997a88bb1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0          [[1], [], [4]]\n",
       "1                   [[6]]\n",
       "2          [[1], [], [4]]\n",
       "3                   [[6]]\n",
       "4          [[1], [], [4]]\n",
       "                ...      \n",
       "1999995             [[6]]\n",
       "1999996    [[1], [], [4]]\n",
       "1999997             [[6]]\n",
       "1999998    [[1], [], [4]]\n",
       "1999999             [[6]]\n",
       "Length: 2000000, dtype: list"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# pick the first number of the innermost lists, if there is one\n",
    "df.a.ak[:, :, :1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "f149dfaf-c01e-4d0a-8e01-2d20623d216f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0          [1, 2, 3]\n",
       "1             [6, 7]\n",
       "2          [1, 2, 3]\n",
       "3             [6, 7]\n",
       "4          [1, 2, 3]\n",
       "             ...    \n",
       "1999995       [6, 7]\n",
       "1999996    [1, 2, 3]\n",
       "1999997       [6, 7]\n",
       "1999998    [1, 2, 3]\n",
       "1999999       [6, 7]\n",
       "Length: 2000000, dtype: list"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# pick the first inner list of each row\n",
    "df.a.ak[:, 0, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5aaf1903-6a6a-456f-89a7-3dedb01520ad",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}