[docs]classDaskAwkwardAccessor(LazyAccessor):"""Perform awkward operations on a dask series or frame These operations are lazy, because of how dask works. Note that we use mapping operations here, so any action on axis==0 or 1 will produce results per partition, which you must then combine. To perform intra-partition operations, we recommend you use the ``.to_dask_awkward`` method. Correct arrow dtypes will be deduced when the input is also arrow, which is now the default for the dask "dataframe.dtype_backend" config options. """series_type=dd.Seriesdataframe_type=dd.DataFrame@staticmethoddef_to_tt(data):# self._obj._meta.convert_dtypes(dtype_backend="pyarrow")data=data._metaifhasattr(data,"_meta")elsedataarr=PandasAwkwardAccessor.to_arrow(data)ifisinstance(arr,pa.ChunkedArray)andlen(arr)==0:arr=arr.combine_chunks()returnak.to_backend(ak.from_arrow(arr),"typetracer")defto_dask_awkward(self):"""Convert to dask-awkard.Array object This make a single complex awkward array type out of one or more columns. You would do this, in order to use dask-awkward's more advanced inter partition aggregations and column/buffer IO optimisation. See https://dask-awkward.readthedocs.io/ c.f., dask_awkward.to_dataframe """importdask_awkwardasdaktt=self._to_tt(self._obj)returndak.lib.core.new_array_object(self._obj.dask,divisions=self._obj.divisions,name=self._obj._name,meta=tt)def__getattr__(self,item):ifself.subaccessorandisinstance(item,str):item=getattr(self.subaccessors[self.subaccessor],item)elifisinstance(item,str)anditeminself.subaccessors:returnDaskAwkwardAccessor(self._obj,subaccessor=item,behavior=self._behavior)defselect(*inargs,where=None,**kwargs):orig=self._obj.head()deffunc2(data):importakimbo.pandas# noqa: F401others=[(k._objifisinstance(k,DaskAwkwardAccessor)elsek)forkininargs]ifisinstance(item,str):# work on pandas APIreturngetattr(PandasAwkwardAccessor(data),item)(*others,**kwargs)else:# ak to akarr=data.ak.array# others =ifwhere:part=arr[where]arr=ak.with_field(arr,part,where)others=[_[where]for_inothers]else:part=arrout=item(part,*others,**kwargs)ifwhere:out=ak.with_field(arr,out,where)out=pd.arrays.ArrowExtensionArray(ak.to_arrow(out,extensionarray=False))returnpd.Series(out)out0=func2(orig)returnself._obj.map_partitions(func2,meta=out0)returnselectdef__dir__(self)->list[str]:returnsorted(super().__dir__()+["to_dask_awkward"])