o
    cx@                  	   @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddl	Z
ddlZ
ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZ ddl m!Z!m"Z" dd	l#m$Z$ dd
l%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5m6Z6 erddlm7Z7 ne8Z7e9e:Z;ee<e<f Z=de<dee< fddZ>G dd de?Z@deddfddZAG dd de?ZBde<de*ddfd d!ZCde<de*defd"d#ZDd$e=dee< fd%d&ZEG d'd( d(ZFG d)d* d*e7ZGd+eGdeGfd,d-ZHeHd.d/dee& fd0d1ZIG d2d/ d/ZJG d3d4 d4eZK	dEd5e&d6ee<e?f d7eed8  ddfd9d:ZL	;dFded<eMdeJfd=d>ZNd5e&de*ded/ fd?d@ZOG dAdB dBeZPG dCdD dDZQdS )GzO
The main purpose of this module is to expose LinkCollector.collect_sources().
    N)
HTMLParser)Values)TYPE_CHECKINGCallableDictIterableListMutableMapping
NamedTupleOptionalSequenceTupleUnion)requests)Response)
RetryErrorSSLError)NetworkConnectionError)Link)SearchScope)
PipSession)raise_for_status)is_archive_fileredact_auth_from_url)vcs   )CandidatesFromPage
LinkSourcebuild_source)Protocolurlreturnc                 C   s6   t jD ]}|  |r| t| dv r|  S qdS )zgLook for VCS schemes in the URL.

    Returns the matched VCS scheme, or None if there's no match.
    z+:N)r   schemeslower
startswithlen)r!   scheme r(   J/opt/certbot/lib/python3.10/site-packages/pip/_internal/index/collector.py_match_vcs_scheme7   s
   
r*   c                       s*   e Zd Zdededdf fddZ  ZS )_NotAPIContentcontent_typerequest_descr"   Nc                    s   t  || || _|| _d S N)super__init__r,   r-   )selfr,   r-   	__class__r(   r)   r0   C   s   
z_NotAPIContent.__init__)__name__
__module____qualname__strr0   __classcell__r(   r(   r2   r)   r+   B   s    "r+   responsec                 C   s2   | j dd}| }|drdS t|| jj)z
    Check the Content-Type header to ensure the response contains a Simple
    API Response.

    Raises `_NotAPIContent` if the content type is not a valid content-type.
    Content-TypeUnknown)z	text/htmlz#application/vnd.pypi.simple.v1+html#application/vnd.pypi.simple.v1+jsonN)headersgetr$   r%   r+   requestmethod)r9   r,   content_type_lr(   r(   r)   _ensure_api_headerI   s   rB   c                   @   s   e Zd ZdS )_NotHTTPN)r4   r5   r6   r(   r(   r(   r)   rC   _   s    rC   sessionc                 C   sF   t j| \}}}}}|dvrt |j| dd}t| t| dS )z
    Send a HEAD request to the URL, and ensure the response contains a simple
    API Response.

    Raises `_NotHTTP` if the URL is not available for a HEAD request, or
    `_NotAPIContent` if the content type is not a valid content type.
    >   httphttpsT)allow_redirectsN)urllibparseurlsplitrC   headr   rB   )r!   rD   r'   netlocpathqueryfragmentrespr(   r(   r)   _ensure_api_responsec   s   rQ   c                 C   sx   t t| jrt| |d tdt|  |j| dg dddd}t	| t
| tdt| |jd	d
 |S )aY  Access an Simple API response with GET, and return the response.

    This consists of three parts:

    1. If the URL looks suspiciously like an archive, send a HEAD first to
       check the Content-Type is HTML or Simple API, to avoid downloading a
       large file. Raise `_NotHTTP` if the content type cannot be determined, or
       `_NotAPIContent` if it is not HTML or a Simple API.
    2. Actually perform the request. Raise HTTP exceptions on network failures.
    3. Check the Content-Type header to make sure we got a Simple API response,
       and raise `_NotAPIContent` otherwise.
    rD   zGetting page %sz, )r<   z*application/vnd.pypi.simple.v1+html; q=0.1ztext/html; q=0.01z	max-age=0)AcceptzCache-Control)r=   zFetched page %s as %sr:   r;   )r   r   filenamerQ   loggerdebugr   r>   joinr   rB   r=   )r!   rD   rP   r(   r(   r)   _get_simple_responseu   s&   rX   r=   c                 C   s<   | rd| v rt j }| d |d< |d}|rt|S dS )z=Determine if we have any encoding information in our headers.r:   zcontent-typecharsetN)emailmessageMessage	get_paramr7   )r=   mrY   r(   r(   r)   _get_encoding_from_headers   s   

r_   c                   @   s6   e Zd ZdddZdedefdd	Zdefd
dZdS )CacheablePageContentpageIndexContentr"   Nc                 C   s   |j sJ || _d S r.   )cache_link_parsingra   r1   ra   r(   r(   r)   r0      s   

zCacheablePageContent.__init__otherc                 C   s   t |t| o| jj|jjkS r.   )
isinstancetypera   r!   )r1   re   r(   r(   r)   __eq__   s   zCacheablePageContent.__eq__c                 C   s   t | jjS r.   )hashra   r!   r1   r(   r(   r)   __hash__   s   zCacheablePageContent.__hash__)ra   rb   r"   N)	r4   r5   r6   r0   objectboolrh   intrk   r(   r(   r(   r)   r`      s    
r`   c                   @   s"   e Zd Zdddee fddZdS )
ParseLinksra   rb   r"   c                 C   s   d S r.   r(   rd   r(   r(   r)   __call__   s   zParseLinks.__call__N)r4   r5   r6   r   r   rp   r(   r(   r(   r)   ro      s    ro   fnc                    sP   t jdddtdtt f fddt  dddtt f fd	d
}|S )z
    Given a function that parses an Iterable[Link] from an IndexContent, cache the
    function's result (keyed by CacheablePageContent), unless the IndexContent
    `page` has `page.cache_link_parsing == False`.
    N)maxsizecacheable_pager"   c                    s   t  | jS r.   )listra   )rs   )rq   r(   r)   wrapper   s   z*with_cached_index_content.<locals>.wrapperra   rb   c                    s   | j r	t| S t | S r.   )rc   r`   rt   )ra   rq   ru   r(   r)   wrapper_wrapper   s   z2with_cached_index_content.<locals>.wrapper_wrapper)	functools	lru_cacher`   r   r   wraps)rq   rw   r(   rv   r)   with_cached_index_content   s
   
r{   ra   rb   c           
      c   s    | j  }|dr+t| j}|dg D ]}t|| j	}|du r%q|V  qdS t
| j	}| jp4d}|| j| | j	}|jpE|}|jD ]}	tj|	||d}|du rXqI|V  qIdS )z\
    Parse a Simple API's Index Content, and yield its anchor elements as Link objects.
    r<   filesNzutf-8)page_urlbase_url)r,   r$   r%   jsonloadscontentr>   r   	from_jsonr!   HTMLLinkParserencodingfeeddecoder~   anchorsfrom_element)
ra   rA   datafilelinkparserr   r!   r~   anchorr(   r(   r)   parse_links   s*   





r   c                   @   sH   e Zd ZdZ	ddededee dededd	fd
dZdefddZ	d	S )rb   z5Represents one response (or page), along with its URLTr   r,   r   r!   rc   r"   Nc                 C   s"   || _ || _|| _|| _|| _dS )am  
        :param encoding: the encoding to decode the given content.
        :param url: the URL from which the HTML was downloaded.
        :param cache_link_parsing: whether links parsed from this page's url
                                   should be cached. PyPI index urls should
                                   have this set to False, for example.
        N)r   r,   r   r!   rc   )r1   r   r,   r   r!   rc   r(   r(   r)   r0     s
   
zIndexContent.__init__c                 C   s
   t | jS r.   )r   r!   rj   r(   r(   r)   __str__  s   
zIndexContent.__str__T)
r4   r5   r6   __doc__bytesr7   r   rm   r0   r   r(   r(   r(   r)   rb     s"    
c                       sv   e Zd ZdZdeddf fddZdedeeeee f  ddfd	d
Z	deeeee f  dee fddZ
  ZS )r   zf
    HTMLParser that keeps the first base HREF and a list of all anchor
    elements' attributes.
    r!   r"   Nc                    s$   t  jdd || _d | _g | _d S )NT)convert_charrefs)r/   r0   r!   r~   r   )r1   r!   r2   r(   r)   r0   #  s   
zHTMLLinkParser.__init__tagattrsc                 C   sR   |dkr| j d u r| |}|d ur|| _ d S d S |dkr'| jt| d S d S )Nbasea)r~   get_hrefr   appenddict)r1   r   r   hrefr(   r(   r)   handle_starttag*  s   

zHTMLLinkParser.handle_starttagc                 C   s"   |D ]\}}|dkr|  S qd S )Nr   r(   )r1   r   namevaluer(   r(   r)   r   2  s
   zHTMLLinkParser.get_href)r4   r5   r6   r   r7   r0   r   r   r   r   r   r8   r(   r(   r2   r)   r     s
    &.r   r   reasonmeth).Nc                 C   s   |d u rt j}|d| | d S )Nz%Could not fetch URL %s: %s - skipping)rU   rV   )r   r   r   r(   r(   r)   _handle_get_simple_fail9  s   r   Trc   c                 C   s&   t | j}t| j| jd || j|dS )Nr:   )r   r!   rc   )r_   r=   rb   r   r!   )r9   rc   r   r(   r(   r)   _make_index_contentC  s   
r   c          
   
   C   s  | j ddd }t|}|rtd||  d S tj|\}}}}}}|dkrHtj	
tj|rH|ds;|d7 }tj|d}td| zt||d	}W n ty`   td
|  Y d S  ty| } ztd| |j|j W Y d }~d S d }~w ty } zt| | W Y d }~d S d }~w ty } zt| | W Y d }~d S d }~w ty } zd}	|	t|7 }	t| |	tjd W Y d }~d S d }~w tjy } zt| d|  W Y d }~d S d }~w tjy   t| d Y d S w t|| jdS )N#r   r   zICannot look at %s URL %s because it does not support lookup as web pages.r   /z
index.htmlz# file: URL is directory, getting %srR   z`Skipping page %s because it looks like an archive, and cannot be checked by a HTTP HEAD request.zSkipping page %s because the %s request got Content-Type: %s. The only supported Content-Types are application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html, and text/htmlz4There was a problem confirming the ssl certificate: )r   zconnection error: z	timed out)rc   ) r!   splitr*   rU   warningrH   rI   urlparseosrM   isdirr?   url2pathnameendswithurljoinrV   rX   rC   r+   r-   r,   r   r   r   r   r7   infor   ConnectionErrorTimeoutr   rc   )
r   rD   r!   
vcs_schemer'   _rM   rP   excr   r(   r(   r)   _get_index_contentP  sn   
r   c                   @   s.   e Zd ZU eee  ed< eee  ed< dS )CollectedSources
find_links
index_urlsN)r4   r5   r6   r   r   r   __annotations__r(   r(   r(   r)   r     s   
 r   c                
   @   s   e Zd ZdZdededdfddZe	dded	ed
e	dd fddZ
edee fddZdedee fddZdededefddZdS )LinkCollectorz
    Responsible for collecting Link objects from all configured locations,
    making network requests as needed.

    The class's main method is its collect_sources() method.
    rD   search_scoper"   Nc                 C   s   || _ || _d S r.   )r   rD   )r1   rD   r   r(   r(   r)   r0     s   
zLinkCollector.__init__Foptionssuppress_no_indexc                 C   sd   |j g|j }|jr|stdddd |D  g }|jp g }tj|||jd}t	||d}|S )z
        :param session: The Session to use to make requests.
        :param suppress_no_index: Whether to ignore the --no-index option
            when constructing the SearchScope object.
        zIgnoring indexes: %s,c                 s   s    | ]}t |V  qd S r.   r   ).0r!   r(   r(   r)   	<genexpr>  s    z'LinkCollector.create.<locals>.<genexpr>)r   r   no_index)rD   r   )
	index_urlextra_index_urlsr   rU   rV   rW   r   r   creater   )clsrD   r   r   r   r   r   link_collectorr(   r(   r)   r     s$   

zLinkCollector.createc                 C   s   | j jS r.   )r   r   rj   r(   r(   r)   r     s   zLinkCollector.find_linkslocationc                 C   s   t || jdS )z>
        Fetch an HTML page containing package links.
        rR   )r   rD   )r1   r   r(   r(   r)   fetch_response  s   zLinkCollector.fetch_responseproject_namecandidates_from_pagec                    s   t  fddj|D  }t  fddjD  }ttj	rIdd t
||D }t| d| dg| }td| tt|t|d	S )
Nc                 3   &    | ]}t | jjd d dV  qdS )Fr   page_validator
expand_dirrc   Nr   rD   is_secure_originr   locr   r1   r(   r)   r         
z0LinkCollector.collect_sources.<locals>.<genexpr>c                 3   r   )Tr   Nr   r   r   r(   r)   r     r   c                 S   s*   g | ]}|d ur|j d urd|j  qS )Nz* )r   )r   sr(   r(   r)   
<listcomp>  s    
z1LinkCollector.collect_sources.<locals>.<listcomp>z' location(s) to search for versions of :
)r   r   )collectionsOrderedDictr   get_index_urls_locationsvaluesr   rU   isEnabledForloggingDEBUG	itertoolschainr&   rV   rW   r   rt   )r1   r   r   index_url_sourcesfind_links_sourceslinesr(   r   r)   collect_sources  s2   
	
	

zLinkCollector.collect_sources)F)r4   r5   r6   r   r   r   r0   classmethodr   rm   r   propertyr   r7   r   r   r   rb   r   r   r   r   r(   r(   r(   r)   r     s<    
!r   r.   r   )Rr   r   email.messagerZ   rx   r   r   r   r   urllib.parserH   urllib.requesthtml.parserr   optparser   typingr   r   r   r   r   r	   r
   r   r   r   r   pip._vendorr   pip._vendor.requestsr   pip._vendor.requests.exceptionsr   r   pip._internal.exceptionsr   pip._internal.models.linkr   !pip._internal.models.search_scoper   pip._internal.network.sessionr   pip._internal.network.utilsr   pip._internal.utils.filetypesr   pip._internal.utils.miscr   pip._internal.vcsr   sourcesr   r   r   r    rl   	getLoggerr4   rU   r7   ResponseHeadersr*   	Exceptionr+   rB   rC   rQ   rX   r_   r`   ro   r{   r   rb   r   r   rm   r   r   r   r   r(   r(   r(   r)   <module>   s    4
?



=