U
    /ed                     @   s>   d dl Zd dlmZ d dlmZmZ G dd dZdd ZdS )    N)_get_pyarrow_dtypes_meta_from_dtypesc                   @   s<   e Zd Zedd Zedd Zedd Zedd Zd	S )
ArrowORCEnginec              
   K   s  t |dkr(||d s(||d }d }g }	dd }
|rd}|D ]}||d}t|}|d krr|j}n||jkrtdtt	|j
}|r|	||d| fg ||j
k r|	||||t|  fg |t|7 }q|rt|dkr||j
8 }nd}W 5 Q R X qFnR|D ]L}|d krV||d d}t|}|j}W 5 Q R X |	|d fg qt|d d}|d k	rt|t| }|rtd| d	t| d
| |||	}	|d krt|n|}t|tr|gn|}t|||g }|	||fS )N   r   c                 S   s&   |d kr| j }n|| j kr"td|S )N,Incompatible schemas while parsing ORC files)schema
ValueError)Z_or    r	   ?/tmp/pip-unpacked-wheel-dbjnr7gq/dask/dataframe/io/orc/arrow.py_get_schema   s
    
z1ArrowORCEngine.read_metadata.<locals>._get_schemarbr   )
categorieszRequested columns (z) not in schema ())lenisfilefindopenorcORCFiler   r   listrangenstripesappendintr   set_aggregate_files
isinstancestrr   )clsfspathscolumnsindexsplit_stripesaggregate_fileskwargsr   partsr   offsetpathfo_stripesexmetar	   r	   r
   read_metadata   sV    





zArrowORCEngine.read_metadatac           	      C   s   |dkrt |dkrt|dkrg }|d }t|d d }|dd  D ]J}t|d d }|| |kr||d  ||7 }qH|| |}|}qH|| |S |S d S )NTr   r   )r   r   r   )	r   r$   r#   r&   Z	new_partsZnew_partr   partZnext_nstripesr	   r	   r
   r   P   s     


zArrowORCEngine._aggregate_filesc           	      K   s:   g }|D ]\}}|t |||||7 }qtj|jddS )NF)Zdate_as_object)_read_orc_stripespaTableZfrom_batchesZ	to_pandas)	r   r   r&   r   r!   r%   batchesr(   stripesr	   r	   r
   read_partitiond   s    zArrowORCEngine.read_partitionc              	   K   s@   t j|}||j||gd}t|| W 5 Q R X d S )Nwb)r1   r2   Zfrom_pandasr   sepjoinr   Zwrite_table)r   Zdfr(   r   filenamer%   tabler)   r	   r	   r
   write_partitionk   s    zArrowORCEngine.write_partitionN)__name__
__module____qualname__classmethodr.   r   r5   r;   r	   r	   r	   r
   r      s   
G

r   c           
   	   C   sl   |d krt |}g }| |dB}t|}|d kr>t|jn|}|D ]}	|||	| qFW 5 Q R X |S )Nr   )r   r   r   r   r   r   r   Zread_stripe)
r   r(   r4   r   r!   r3   r)   r*   r+   Zstriper	   r	   r
   r0   r   s    
r0   )	Zpyarrowr1   Zpyarrow.orcr   Zdask.dataframe.io.utilsr   r   r   r0   r	   r	   r	   r
   <module>   s   k