o
    1فi                     @   sX  d Z ddlZddlZddlmZmZmZmZm	Z	m
Z
mZmZmZ ddlmZ ddlmZ dd Zdd Zejjejd	d
dgejdddgejdddgejjdddddggedejjddddggedejdd
dgejdddgejdd
dgejdd
dgejdd
dgdd Zejd d
dgd!d" Zejd#d$d%gg d&gd'd( Zejd#d$d%gg d&gd)d* Zd+d, Zd-d. Zejd/d0 Z d1d2 Z!d3d4 Z"d5d6 Z#d7d8 Z$ejd9g d:ejdddgejd;g d<ejd=d
dgejd>d
dgd?d@ Z%ejdAd
dgejdBddg dCg dDg dEfd
dg dFg dGg dHfd
d
g dIg dJg dKfgdLdM Z&ejdNdO Z'ejdPddddQg dRg dSfd
d
ddQg dTg dUfd
dddQg dVg dWfd
dd
dXg dYg dWfgdZd[ Z(ejd\d] Z)ejd^ddg d_g d`fdd
g dag dbfd
dg dcg ddfd
d
g deg dffgdgdh Z*ejdidj Z+ejdkd
dldlge
j,g dmg dndofdg dpe
e	dlge	g dqe	drdsej-ggg dtg dug dvgg dndwfgejdddgdxdy Z.ejd=dd
gejdzdd
gejd{ddQej/g d|ej0d}fd
dXe/g d~fgdd Z1dd Z2ejd=d
dgejd{ddQej/g d|ej0d}fd
dXe/g d~fgdd Z3ejd=d
dgejd{ddQej/g dej0d}fd
dXe/g dfgdd Z4ejd=d
dgejddg dfd
g dfgejd{ddQej/g dej0d}fd
dXe/g dfgdd Z5ejd=dd
gejdzdd
gejd{ddQej/g d|ej0d}fd
dXe/g d~fgdd Z6ejdddQg dfd
dXg dffgdd Z7ejdde8dg dfde8ddg g dfgejd=dd
gdd Z9ejdddgdd Z:dd Z;dd Z<dd Z=dd Z>dd Z?ejd d
dgdd Z@dd ZAejdd
dgejdAd
dgdd ZBejdd
dgejdAd
dgdd ZCdS )z
these are systematically testing all of the args to value_counts
with different size combinations. This is to ensure stability of the sorting
and proper parameter handling
    N)	CategoricalCategoricalIndex	DataFrameGrouperIndex
MultiIndexSeries
date_rangeto_datetime)Versionc                  C   s   t dgdgd} | d d| d< | dd  }t ddggddgd}|d d|d< t|}tdg|d	d
}t|| d S )NfemaleUS)gendercountryr   categoryr   columns   countindexname)	r   astypegroupbyvalue_countsr   
from_framer   tmassert_series_equal)dfresultdf_mi_expectedmi_expectedexpected r#   o/home/test-dt/pcm-url-check/venv/lib/python3.10/site-packages/pandas/tests/groupby/methods/test_value_counts.py.tests_value_counts_index_names_category_column   s   
r%   c                 C   s   t ddd}ttjdtd|tjd||tjdd|d |d}| rm|d d	|d< tj	|j
dd d
df< tj	|j
dd ddf< tj	|j
dd ddf< tj	|j
dd ddf< tj	|j
dd ddf< |S )Nz
2015-08-24
   )periods   abcdr   )1st2nd3rdr,   float   r*         r+            	   )r	   r   nprandomdefault_rngchoicelistintegersr   nanloc)	seed_nansnmdaysframer#   r#   r$   seed_df.   s   rB   r=   TFnum_rowsr&   2   max_int      keysr*   r+   )idsbinsisortznormalize, name)T
proportion)Fr   sort	ascendingdropnac                 C   s   t | ||}dd }|||	|
|d}|j||d}|d jdi |}|j||d}|d jtjfi |}|jjd d dg |j_||}t|||f\}}t	
| |  d S )Nc                 S   s2   t t| jjt| jj}tj|| jjd| _| S )Nnames)	r9   mapr   get_level_valuesrangenlevelsr   from_arraysrQ   )r   arrr#   r#   r$   rebuild_index_   s   z7test_series_groupby_value_counts.<locals>.rebuild_index)	normalizerM   rN   rO   rJ   rM   r,   r#   )rB   r   r   applyr   r   rQ   renamerR   r   r   
sort_index)r=   rC   rE   rH   rJ   rK   rY   r   rM   rN   rO   r   rX   kwargsgrleftrightr#   r#   r$    test_series_groupby_value_countsE   s    
rc   utcc                 C   s   t g dg dddg}t|d | dd|d< |td	dd
}|d   }|d tj }|j	j
|j	_
|d}t|| d S )NiGI]i)J]iJ]iK]i)<M]iU=M]iN]applerg   bananarh   orangeri   pear	TimestampFoodr/   rl   srd   unitDatetime1Dfreqkeyrm   r   )r   dropr
   r   r   r   r^   r\   r   r   rQ   r]   r   r   )rd   r   dfgr   r"   r#   r#   r$   -test_series_groupby_value_counts_with_grouperz   s   	
rx   r   AB)ry   rz   Cc                 C   sf   t | d}|| d d }|| d   }tg |jdd}tjg gt|  | d|_t	
|| d S )Nr   r[   r   )dtyper   rP   )r   r   r   r   r|   r   rV   lenr   r   r   r   r   rw   r   r"   r#   r#   r$   &test_series_groupby_value_counts_empty   s   
r   c                 C   sP   t tt| g| d}|| d d }|| d   }| }t|| d S )N)datar   r[   )r   rT   r}   r   r   r   r   r~   r#   r#   r$   (test_series_groupby_value_counts_one_row   s
   r   c                  C   sp   t tdgddgd} | dg }t ddgttddgtddgddgdddgd	d
}t	
|| d S )Nab)
categoriesr   r   Fr   )r   orderedr|   r   r   r   r   )r   r   r   r   r   rV   r5   arrayr   r   r   )rn   r   r"   r#   r#   r$   /test_series_groupby_value_counts_on_categorical   s   r   c                  C   s   t g dg dg dd} | jddgddd	 }|jdd}td
dgddgg dgg dg dg dgg dd}tg d|dd}t|| d S )Nmaler   r   r   r   r   lowmediumhighr   r   r   r   FRr   r   r   r   r   	educationr   r   r   FrZ   r   r   r   r   r   )r   r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r(   r   r(   r   r   r   levelscodesrQ   r   r   r   r(   r   r   r   )r   r   r   r   r   r   r   )r   gbr   r   r"   r#   r#   r$   (test_series_groupby_value_counts_no_sort   s   r   c                   C   s   t g dg dg ddS )Nr   r   r   r   r   r#   r#   r#   r$   education_df   s   r   c                 C   s|   d}t jt|d | jddd}W d    n1 sw   Y  tjtdd |  W d    d S 1 s7w   Y  d S )Nz+DataFrame.groupby with axis=1 is deprecatedmatchr   r   axisr   )r   assert_produces_warningFutureWarningr   pytestraisesNotImplementedErrorr   )r   msggpr#   r#   r$   	test_axis   s   
"r   c                 C   sL   |  d}tjtdd |jdgd W d    d S 1 sw   Y  d S )Nr   subsetr   r   )r   r   r   
ValueErrorr   )r   r   r#   r#   r$   test_bad_subset   s   
"r   c                 C   sv   t tjt dkr|tjjddd | dddg jdd	}t	g d
t
jg dg dddd}t|| d S )N1.25Ypandas default unstable sorting of duplicatesissue with numpy>=1.25 with AVX instructionsFreasonstrictr   r   r   TrY   )      ?      ?r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rP   rL   r   )r   r5   __version__applymarkerr   markxfailr   r   r   r   from_tuplesr   r   )r   requestr   r"   r#   r#   r$   
test_basic   s&   	
r   c                 C   s   | | j |||dS )NrY   rM   rN   )r   )r   rH   rY   rM   rN   r#   r#   r$   _frame_value_counts  s   r   r   columnr   functionzsort, ascending))FN)TT)TFas_indexrA   c
                    s  t tjt dkr|r|r|r|tjjddd d d j fddd| }
 j|
|d	}|d
dg j	|||d}|r|dkrDt
nd }d}tj||d |td
dg|||}W d    n1 sew   Y  |rtt|| d S |rxdnd}| jd|idd}|dkr|jddidd}t|d dd|d< n|dkr|d dk|d< nt|d dd|d< t|| d S  d
 d  d   d< |d j	|||d}||_|r,|jjdd}|d jdjd|d
< |d jdjd|d< |d= |jdd idd}t||_|j d r$d g|jjdd   |j_t|| d S |dd
|d jdjd |dd|d jdjd |	rY| ddd}|d= t|| d S )Nr   r   Fr   r   c                    s    d |  dkS )Nr   r   r#   )xr   r#   r$   <lambda>K      z6test_against_frame_and_seriesgroupby.<locals>.<lambda>r   )byr   r   r   r   r   z7DataFrameGroupBy.apply operated on the grouping columnsr   rL   r   r   r   r   level_0r   r   r   -bothr   r(   str)r   r   )!r   r5   r   r   r   r   r   valuesr   r   r   r   r   r\   r   r   reset_indexr]   whereassert_frame_equalr   r   to_framer   splitgetr   r   r   isnarQ   insertr   )r   r   rY   r   rM   rN   r   rA   r   using_infer_stringr   r   r   warnr   r"   index_frameindex_frame2r#   r   r$   $test_against_frame_and_seriesgroupby  sr    

""r   rY   zCsort, ascending, expected_rows, expected_count, expected_group_size)r   r   r(   r/      r   )r   r/   r   r/   r   )r/   r   r   r(   r   )r(   r   r   r   r   )r/   r   r/   r   r   )r   r   r(   r   r/   )r   r   r   r   r(   )r   r/   r   r   r/   c	                    s  |}	 |	j |	_jddgddd}
|
d j|||d}t }dD ]  fdd	|D | < | |	}|j |	|_q'|r]||d
< |d
  |  < |	dkr\|d
  |d
< n||d< |	dkrm|d  |d< |r||	tkr|| dddd}t|| d S )Nr   r   Fr   rM   r   r   r   c                       g | ]}  | qS r#   r#   .0rowr   r   r#   r$   
<listcomp>      z!test_compound.<locals>.<listcomp>rL   zstring[pyarrow]r   r   )	r   r   r   r   r   convert_dtypesobjectr   r   )r   rY   rM   rN   expected_rowsexpected_countexpected_group_sizeany_string_dtyper   r|   r   r   r"   r#   r   r$   test_compound  s4   


r   c                   C   s$   t g dg dg ddg ddS )Nr   r   r   r   )r(   r   r      )r(   r   r   r   ru   num_legs	num_wings)falcondogcatantr   r   r#   r#   r#   r$   
animals_df  s   r   z?sort, ascending, normalize, name, expected_data, expected_indexr   r   r(   r   )r   r   r   )r(   r   r   r(   r   r   r   r   r(   )r   )r(   r   r   r   r(   r   r   )r   )r   r(   r   )r   r(   r   rL   )r   r   r   c           
      C   s^   | j |||d}t|tj|g dd|d}t|| | dj |||d}	t|	| d S )N)rM   rN   rY   r   rP   r   ru   )r   r   r   rV   r   r   r   )
r   rM   rN   rY   r   expected_dataexpected_indexresult_framer"   result_frame_groupbyr#   r#   r$   test_data_frame_value_counts  s   
r  c                  C   s`   t j} tdd| d| ddddg	ddd| | ddddg	dddddd| d| g	ddddddd| | g	d	S )
Nr   r   r   r/   r(   rF   r3   r1   )ry   rz   r{   D)r5   r;   r   )r>   r#   r#   r$   nulls_df  s   r  z:group_dropna, count_dropna, expected_rows, expected_values)	r   r   r/   rF   r1   r   r3   r(   r   )	r   r         ?r   r   r   r   r  r  )r   r   r/   rF   r(   r   )r   r   r  r  r  r  )r   r   rF   r1   r   r3   )r   r   r   r   r   r   )r   r   rF   )r   r   r  c                    s   t tjt dkr|s|tjjddd jddg|d}|jdd|d	}t	 }j
D ]  fd
d|D | < q-t|}	t||	dd}
t||
 d S )Nr   r   Fr   ry   rz   )rO   T)rY   rM   rO   c                    r   r#   r#   r   r   r  r#   r$   r     r   z,test_dropna_combinations.<locals>.<listcomp>rL   r   )r   r5   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r  group_dropnacount_dropnar   expected_valuesr   r   r   r   r   r"   r#   r  r$   test_dropna_combinations  s   	

r	  c                 C   s    t g dg dd| | dgdS )Nr   )JohnAnner
  BethSmithLouiseru   
first_namemiddle_namer   )nulls_fixturer#   r#   r$   names_with_nulls_df  s   
r  z%dropna, expected_data, expected_indexr   )r   r   )r  r
  )r  r  r  rP   r   )r  r  r
  r  r  )r   r   r   r   )r   r   r(   r(   )r(   r   r   r(   r   c           	      C   s`   | j ||d}t|||d}|r|tt| }t|| | dj ||d}t|| d S )N)rO   rY   r   ru   )r   r   r-   r}   r   r   r   )	r  rO   rY   r   r   r   r   r"   r   r#   r#   r$   #test_data_frame_value_counts_dropna&  s   !
r  observedznormalize, name, expected_data)r(   r   r   r   r   r   r   r   r   r   r   r   r|   )r   r   r           r  r  r   r   r  r  r  r  c                 C   s   t tjt dkr|tjjddd | djd||d}|j	|d}t
jg d	g d
d}	t||	|d}
tdD ]}|
jjt|
jj| |d|
_q;|rWt||
 d S |
j|r]dndd}t|| d S )Nr   r   Fr   r   r   r   r  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rP   r   r/   levelrL   r   r   )r   r5   r   r   r   r   r   r   r   r   r   r   r   rT   r   
set_levelsr   r   r   r   r   r   r   r   r  rY   r   r   r   r   r   r   expected_seriesir"   r#   r#   r$   =test_categorical_single_grouper_with_only_observed_categoriesY  s<   



r)  c                 C   s   |   d} | d jdg| d< | jd||d}|j|d}t|tj|g dd|d}	t	d	D ] }
t
|	jj|
 }|
d
krI|| d jj}|	jj||
d|	_q2|r]t||	 d S |	j|d}t|| d S )Nr   r   ASIAr  r   r   rP   r   r/   r   r"  r$  )copyr   r   add_categoriesr   r   r   r   r   rT   r   r   r   set_categoriesr   r%  r   r   r   r   )r   r   r  r   rY   r   r   r   r   r'  r(  index_levelr"   r#   r#   r$   !assert_categorical_single_grouper  s.   
r/  c              	   C   sJ   t tjt dkr|tjjddd g d}t| |d||||d d S )Nr   r   Fr   r  Tr   r   r  r   rY   r   r   r   r5   r   r   r   r   r   r/  r   r   rY   r   r   r   r   r#   r#   r$   -test_categorical_single_grouper_observed_true  s"   

r3  )r(   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r  r  r  r   r   r  r  r  r  r  r  r  r  r  r  c              	   C   sJ   t tjt dkr|tjjddd g d}t| |d||||d d S )Nr   r   Fr   )r   r   r   r  r  r  r   r   r  r  r   r!  )r*  r   r   )r*  r   r   )r*  r   r   )r*  r   r   )r*  r   r   )r*  r   r   r0  r1  r2  r#   r#   r$   .test_categorical_single_grouper_observed_false  s"   ,

r4  zobserved, expected_index)r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   )r   r   r   )r   r   r   )r5  r6  r7  r8  r9  )r   r   r(   r   r   r   r   r   r   r   r   r   )r  r  r  r  r  r  r  r  r  r  r  r  c                 C   s   |   } | d d| d< | d d| d< | jddg||d}|j|d}t|r/||dk n|tj|g dd|d	}	td
D ]}
|	jj	t
|	jj|
 |
d|	_q@|r\t||	 d S |	j|rbdndd}t|| d S )Nr   r   r   r  r   r  )r   r   r   rP   r   r(   r"  rL   r   r$  )r+  r   r   r   r   r   r   rT   r   r%  r   r   r   r   r   r   )r   r   r  r   rY   r   r   r   r   r'  r(  r"   r#   r#   r$   "test_categorical_multiple_groupersX  s2   7


r:  c                 C   s   t tjt dkr|tjjddd |  } | d d| d< | d d| d< | j	d||d	}|j
|d
}g d}	t|tj|	g dd|d}
tddD ]}|
jjt|
jj| |d|
_qO|rkt||
 d S |
j|rqdndd}t|| d S )Nr   r   Fr   r   r   r   r   r  r   r  r   rP   r   r   r/   r"  rL   r   r$  )r   r5   r   r   r   r   r   r+  r   r   r   r   r   r   rT   r   r%  r   r   r   r   r   r   r&  r#   r#   r$   test_categorical_non_groupers  s>   


r;  z*normalize, expected_label, expected_valuesr   c                 C   s   t g dg dd}|jg dddd gdd	}|jd
| d}t dtjg dtddg ddg ddg d||i}t|| d S )Nr   r   r(   r/   )ry   rz   )r   rF   r   ry   c                 S   s   | dkrdS dS )Nr   r1   r3   r#   )r(  r#   r#   r$   r     r   z&test_mixed_groupings.<locals>.<lambda>Fr   TrM   rY   r   )r   r   rF   r  r   level_2)r3   r3   r1   rz   )r   r/   r(   )r   r   r   r5   r   intr   r   )rY   expected_labelr  r   r   r   r"   r#   r#   r$   test_mixed_groupings  s   		rB  ztest, columns, expected_namesrepeatabbde)r   Ndr   r   er#  r)   level_1)r   NrE  r   crG  c           
      C   s   t g dg dg|d}ddg}dtjddgtjd	d
g}|j||d }|r<tdtj||ddd}t	
|| d S dd |D }t|}	d|	d< |	d t ||	d}t	|| d S )N)r   r/   rF   r1   r4   )r(   r   r   r3   r&   r   )r   r   r1   r/   rF   r4   )r(   r   r3   r   r   r&   r   r   r   r  rE  r=  r  rP   r   r   c                 S   s   g | ]	}t |d g qS )r   )r9   r   r#   r#   r$   r   .  s    z0test_column_label_duplicates.<locals>.<listcomp>rG  )r   r5   r   int64r   r   r   r   r   r   r   r9   appendr   )
testr   expected_namesr   r   r   rH   r   r"   expected_columnsr#   r#   r$   test_column_label_duplicates  s(   
rN  znormalize, expected_labelc                 C   sn   t g dgdd|gdjddd}d| d}tjt|d	 |j| d
 W d    d S 1 s0w   Y  d S )Nr<  r   r   r   Fr=  zColumn label 'z' is duplicate of result columnr   r   )r   r   r   r   r   r   )rY   rA  r   r   r#   r#   r$   test_result_label_duplicates6  s   	"rO  c                  C   sf   t dddgi} | tjddgtjd}| }tdgtjddggd dgddd}t	
|| d S )Nr   r   r  r(   rP   r   r   )r   r   r5   r   rI  r   r   r   r   r   r   )r   r   r   r"   r#   r#   r$   test_ambiguous_groupingG  s   rP  c                  C   sj   t g dg ddg dd} d}tjt|d | djdgd	 W d    d S 1 s.w   Y  d S )
Nr   r   rH  r   yrS  c1c2r   r   r   r   z;Keys {'c1'} in subset cannot be in the groupby column keys.r   rU  r   r   r   r   r   r   r   r   r   r#   r#   r$   "test_subset_overlaps_gb_key_raisesR  
   "rZ  c                  C   sj   t g dg ddg dd} d}tjt|d | djd	gd
 W d    d S 1 s.w   Y  d S )NrQ  rR  rT  rW  r   z4Keys {'c3'} in subset do not exist in the DataFrame.r   rU  c3r   rX  rY  r#   r#   r$   !test_subset_doesnt_exist_in_frameZ  r[  r]  c                  C   sp   t g dg ddg dd} | jddjdgd	}td
dgtjdd
gddggd dgddd}t|| d S )NrQ  rR  rT  rW  r   r   r"  rV  r   r   r(   r   rS  rP   r   r   r   r   r   r   r   rV   r   r   r   r   r"   r#   r#   r$   test_subsetb  s   r`  c                  C   s   t g dg dg dgg dg dd} | jddjdgd	}td
dgtjdd
gddgddggg dddd}t|| d S )N)r   r   r   )r   rS  rS  rW  )rU  rV  rV  )r   r   r   r"  rV  r   r   r(   r   rS  )NrV  rV  rP   r   r   r^  r_  r#   r#   r$   test_subset_duplicate_columnsn  s   ra  c           	      C   s   t g dg dddg}t|d | ddj||d< |td	dd
}| }tg d| d|}|d  }t	||g dgg dt
dg dgg dd}td|dd}t|| d S )Nre   rf   rk   r/   rl   rn   ro   rq   rr   rs   )z
2019-08-06z
2019-08-07z
2019-08-09z
2019-08-10)rd   )rg   rh   ri   rj   )r   r   r   r(   r(   r/   r   )r   r   r   r(   r(   r/   )rq   rl   rm   r   r   r   r   )r   rv   r
   dtas_unitr   r   r   uniquer   rT   r   r   r   )	rd   rp   r   r   r   dates
timestampsr   r"   r#   r#   r$   test_value_counts_time_grouper  s.   	rg  c                  C   sj   t g dg dg dd} | jddgddd}|d	  }t g dg dg ddd
}t|| d S )N)r   r   r   )r   r   rE  rQ  r<  r   r(   Fr   r/   )r   r(   r/   r   )r   r   r   r   r   )r   r   r   r"   r#   r#   r$   !test_value_counts_integer_columns  s   rh  vc_sortc           
      C   s   t g dg dd}|jd| d}|j||d}|r g d}ng d}td	d
gddggg dg dgddgd}t|||r@dndd}| rM|rMg d}	n| rV|sVg d}	n| s_|r_g d}	ng d}	||	}t|| d S )Nr(   r   r   r   r/   r   r/   r/   r   r   r   rZ   r>  )UUUUUU?UUUUUU?r  r   r   r(   r/   r   )r   r   r   )r   r   r   r   r   rL   r   r   )r   r   r(   )r   r(   r   )r(   r   r   )r   r   r   r   r   taker   r   )
rM   ri  rY   r   r   r   r   r   r"   takerr#   r#   r$   test_value_counts_sort  s&   
"



rq  c           
      C   s   t g dg dddd}|jd| dd}|j||d	}|r#g d
}ng d}|r+dnd}t dtg ddtg d||iddg| }| rO|rOg d}	n| rX|sXg d}	n| sa|rag d}	ng d}	||	}t|| d S )Nrj  rk  rl  r   r  r   T)rM   r  r>  )rm  rn  r  r  )r(   r   r   r   rL   r   )r   r   r(   r(   r   )r/   r   r/   r   )r   r   r(   r/   )r   r(   r   r/   )r(   r/   r   r   )r   r   r   r   	set_indexro  r   r   )
rM   ri  rY   r   r   r   r   r   r"   rp  r#   r#   r$   "test_value_counts_sort_categorical  s0   




rs  )D__doc__numpyr5   r   pandasr   r   r   r   r   r   r   r	   r
   pandas._testing_testingr   pandas.util.versionr   r%   rB   r   slowparametrizereprrc   rx   r   r   r   r   fixturer   r   r   r   r   r   r   r   r  r  r	  r  rV   r;   r  r   rI  r)  r/  r3  r4  r:  r;  rB  r9   rN  rO  rP  rZ  r]  r`  ra  rg  rh  rq  rs  r#   r#   r#   r$   <module>   s   ,*




 \*





	
:#*
&0 %<



!