U
    f/e8                     @  s   d dl mZ d dlZd dlZd dlm  mZ d dlm	Z	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ G d
d deZdddddZdd ZdS )    )annotationsN)	ArrayLikeFilePathOrBuffer)DtypeWarning)is_categorical_dtypepandas_dtype)union_categoricals)ExtensionDtype)ensure_index_from_sequences)
ParserBaseis_index_colc                      sv   e Zd ZU ded< ded< ddddZd	d
 fddZdd ZdddZdd Zdd Z	ddddddZ
  ZS )CParserWrapperbool
low_memoryzparsers.TextReader_readerr   )srcc                   s  | _ | }t | |dd _ jdk	|d<  j|d<  ||  j	d k	sZt
 jj|d< dD ]}||d  qjt|dd |d< ztj j	jf| _W n  tk
r    j	   Y nX  jj _ jd k} jjd krd  _nLt jjdkr,  jj j j|\ _ _ _}nt jjd	  _ jd kr jrp fd
dt jjD  _ntt jj _ jd d   _  jr6 ! j j  j d k	st
 j"dkrt#$ j s % j  t jtkrfddt& jD  _t jtk r6 % j  ' j  (   j _  j)s jj*d	krt+ jrd _, - j j j\} _ _ jd kr| _ jjd kr|s jd k	st
d gt j  _ jj*d	k _.d S )Nr   FZallow_leading_colsusecolson_bad_lines)Zstorage_optionsencodingZ
memory_mapcompressionZerror_bad_linesZwarn_bad_linesdtype   r   c                   s   g | ]} j  | qS  )prefix).0iselfr   F/tmp/pip-unpacked-wheel-tiezk1ph/pandas/io/parsers/c_parser_wrapper.py
<listcomp>j   s    z+CParserWrapper.__init__.<locals>.<listcomp>stringc                   s$   g | ]\}}| ks| kr|qS r   r   )r   r   nr   r   r   r      s    T)/kwdscopyr   __init__popr   	index_colr   Z_open_handlesZhandlesAssertionErrorr   valueensure_dtype_objsgetparsersZ
TextReaderhandler   	Exceptioncloseunnamed_colsnamesheaderlenZ_extract_multi_indexer_columnsindex_names	col_nameslistr   rangeZtable_width
orig_names_evaluate_usecolsZusecols_dtypesetissubsetZ_validate_usecols_names	enumerateZ_validate_parse_dates_presence_set_noconvert_columns_has_complex_date_colleading_colsr   Z_name_processed_clean_index_namesZ_implicit_index)r   r   r#   keyZpassed_namesr4   r   )r   r   r   r%   !   s    









	zCParserWrapper.__init__None)returnc                   s2   t    z| j  W n tk
r,   Y nX d S N)superr/   r   
ValueErrorr   	__class__r   r   r/      s
    
zCParserWrapper.closec                   sJ    j dk	st fdd jD } | j}|D ]} j| q4dS )z
        Set the columns that should not undergo dtype conversions.

        Currently, any column that is involved with date parsing will not
        undergo such conversions.
        Nc                   s   g | ]} j |qS r   )r8   indexr   xr   r   r   r      s    z9CParserWrapper._set_noconvert_columns.<locals>.<listcomp>)r8   r(   r1   Z_set_noconvert_dtype_columnsr   Zset_noconvert)r   Zcol_indicesZnoconvert_columnscolr   r   r   r=      s    
z%CParserWrapper._set_noconvert_columnsNc              
     s`  z,| j r| j|}t|}n| j|}W n tk
r   | jrd| _| | j}| j	|| j
| j| jdd\} }|  | j | jd k	r|    fdd| D }| |f Y S |    Y nX d| _| j}| jjr| jrtdg }t| jjD ]H}| j
d kr||}	n|| j
| }	| j|	|dd}	||	 qt|}| jd k	rj| |}| |}t| }
d	d t||
D }| ||\}}nt| }
| jd k	st t!| j}| |}| jd k	r| |}d
d |
D }| jd kr| "|| dd t||
D }| ||\}}| #|||\}}| || j}|||fS )NFr   r   c                   s   i | ]\}}| kr||qS r   r   )r   kvcolumnsr   r   
<dictcomp>   s       z'CParserWrapper.read.<locals>.<dictcomp>z file structure not yet supportedT)try_parse_datesc                 S  s   i | ]\}\}}||qS r   r   r   rN   r   rO   r   r   r   rR     s    
  c                 S  s   g | ]}|d  qS )r   r   rJ   r   r   r   r   .  s     z'CParserWrapper.read.<locals>.<listcomp>c                 S  s   i | ]\}\}}||qS r   r   rT   r   r   r   rR   2  s    
  )$r   r   Zread_low_memory_concatenate_chunksreadStopIterationZ_first_chunkZ_maybe_dedup_namesr8   Z_get_empty_metar'   r4   r#   r+   Z_maybe_make_multi_index_columnsr5   r   _filter_usecolsitemsr/   r1   r?   r>   NotImplementedErrorr7   r&   _maybe_parse_datesappendr
   sortedzipZ_do_date_conversionsr(   r6   Z_check_data_lengthZ_make_index)r   Znrowschunksdatar1   rI   Zcol_dictZarraysr   valuesZ	data_tupsZalldatar   rP   r   rV      sn    









zCParserWrapper.readc                   s@   |  | j|  d k	r<t|t kr< fddt|D }|S )Nc                   s$   g | ]\}}| ks| kr|qS r   r   )r   r   namer"   r   r   r   @  s      z2CParserWrapper._filter_usecols.<locals>.<listcomp>)r9   r   r3   r<   )r   r1   r   r"   r   rX   <  s    
zCParserWrapper._filter_usecolsc                 C  sL   t | jjd }d }| jjdkrD| jd k	rD| || j| j\}}| _||fS )Nr   )r6   r   r2   r?   r'   r@   r0   )r   r1   Z	idx_namesr   r   r   _get_index_namesE  s      zCParserWrapper._get_index_namesTint)rI   rS   c                 C  s   |r|  |r| |}|S rD   )Z_should_parse_datesZ
_date_conv)r   ra   rI   rS   r   r   r   r[   P  s    
z!CParserWrapper._maybe_parse_dates)N)T)__name__
__module____qualname____annotations__r%   r/   r=   rV   rX   rc   r[   __classcell__r   r   rG   r   r      s   
 	
`	r   zlist[dict[int, ArrayLike]]dict)r_   rC   c                   s  t | d  }g }i }|D ]  fdd| D }dd |D }dd |D }t|dkr|t|g }|tkr||t  | }t	|rt
|dd	| < qt|tr| }	|	|| < qt|| < q|rd
|}
dd|
 dg}tj|tdd |S )z
    Concatenate chunks of data read with low_memory=True.

    The tricky part is handling Categoricals, where different chunks
    may have different inferred categories.
    r   c                   s   g | ]}|  qS r   )r&   )r   chunkrb   r   r   r   b  s     z'_concatenate_chunks.<locals>.<listcomp>c                 S  s   h | ]
}|j qS r   rM   )r   ar   r   r   	<setcomp>d  s     z&_concatenate_chunks.<locals>.<setcomp>c                 S  s   h | ]}t |s|qS r   )r   rJ   r   r   r   rn   f  s      r   F)Zsort_categories, z	Columns (zJ) have mixed types.Specify dtype option on import or set low_memory=False.   )
stacklevel)r6   keysr3   npZfind_common_typeobjectr\   strr&   r   r   
isinstancer	   Zconstruct_array_typeZ_concat_same_typeZconcatenatejoinwarningswarnr   )r_   r1   Zwarning_columnsresultZarrsZdtypesZnumpy_dtypesZcommon_typer   Z
array_typeZwarning_namesZwarning_messager   rl   r   rU   V  s>    



rU   c                   s2   t  tr fdd D  n dk	r.t   S )zc
    Ensure we have either None, a dtype object, or a dictionary mapping to
    dtype objects.
    c                   s   i | ]}|t  | qS r   )r   )r   rN   rM   r   r   rR     s      z%ensure_dtype_objs.<locals>.<dictcomp>N)rw   rj   r   rM   r   rM   r   r*     s
    
r*   )
__future__r   ry   Znumpyrt   Zpandas._libs.parsersZ_libsr,   Zpandas._typingr   r   Zpandas.errorsr   Zpandas.core.dtypes.commonr   r   Zpandas.core.dtypes.concatr   Zpandas.core.dtypes.dtypesr	   Zpandas.core.indexes.apir
   Zpandas.io.parsers.base_parserr   r   r   rU   r*   r   r   r   r   <module>   s     ;;