Updated script that can be controled by Nodejs web app
This commit is contained in:
@ -0,0 +1,28 @@
|
||||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
"""CacheControl import Interface.
|
||||
|
||||
Make it easy to import from cachecontrol without long namespaces.
|
||||
"""
|
||||
__author__ = "Eric Larson"
|
||||
__email__ = "eric@ionrock.org"
|
||||
__version__ = "0.14.0"
|
||||
|
||||
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
|
||||
from pip._vendor.cachecontrol.controller import CacheController
|
||||
from pip._vendor.cachecontrol.wrapper import CacheControl
|
||||
|
||||
__all__ = [
|
||||
"__author__",
|
||||
"__email__",
|
||||
"__version__",
|
||||
"CacheControlAdapter",
|
||||
"CacheController",
|
||||
"CacheControl",
|
||||
]
|
||||
|
||||
import logging
|
||||
|
||||
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,70 @@
|
||||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from argparse import ArgumentParser
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pip._vendor import requests
|
||||
|
||||
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
|
||||
from pip._vendor.cachecontrol.cache import DictCache
|
||||
from pip._vendor.cachecontrol.controller import logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from argparse import Namespace
|
||||
|
||||
from pip._vendor.cachecontrol.controller import CacheController
|
||||
|
||||
|
||||
def setup_logging() -> None:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
handler = logging.StreamHandler()
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
def get_session() -> requests.Session:
|
||||
adapter = CacheControlAdapter(
|
||||
DictCache(), cache_etags=True, serializer=None, heuristic=None
|
||||
)
|
||||
sess = requests.Session()
|
||||
sess.mount("http://", adapter)
|
||||
sess.mount("https://", adapter)
|
||||
|
||||
sess.cache_controller = adapter.controller # type: ignore[attr-defined]
|
||||
return sess
|
||||
|
||||
|
||||
def get_args() -> Namespace:
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("url", help="The URL to try and cache")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = get_args()
|
||||
sess = get_session()
|
||||
|
||||
# Make a request to get a response
|
||||
resp = sess.get(args.url)
|
||||
|
||||
# Turn on logging
|
||||
setup_logging()
|
||||
|
||||
# try setting the cache
|
||||
cache_controller: CacheController = (
|
||||
sess.cache_controller # type: ignore[attr-defined]
|
||||
)
|
||||
cache_controller.cache_response(resp.request, resp.raw)
|
||||
|
||||
# Now try to get it
|
||||
if cache_controller.cached_request(resp.request):
|
||||
print("Cached!")
|
||||
else:
|
||||
print("Not cached :(")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
161
lib/python3.13/site-packages/pip/_vendor/cachecontrol/adapter.py
Normal file
161
lib/python3.13/site-packages/pip/_vendor/cachecontrol/adapter.py
Normal file
@ -0,0 +1,161 @@
|
||||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import types
|
||||
import zlib
|
||||
from typing import TYPE_CHECKING, Any, Collection, Mapping
|
||||
|
||||
from pip._vendor.requests.adapters import HTTPAdapter
|
||||
|
||||
from pip._vendor.cachecontrol.cache import DictCache
|
||||
from pip._vendor.cachecontrol.controller import PERMANENT_REDIRECT_STATUSES, CacheController
|
||||
from pip._vendor.cachecontrol.filewrapper import CallbackFileWrapper
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pip._vendor.requests import PreparedRequest, Response
|
||||
from pip._vendor.urllib3 import HTTPResponse
|
||||
|
||||
from pip._vendor.cachecontrol.cache import BaseCache
|
||||
from pip._vendor.cachecontrol.heuristics import BaseHeuristic
|
||||
from pip._vendor.cachecontrol.serialize import Serializer
|
||||
|
||||
|
||||
class CacheControlAdapter(HTTPAdapter):
|
||||
invalidating_methods = {"PUT", "PATCH", "DELETE"}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cache: BaseCache | None = None,
|
||||
cache_etags: bool = True,
|
||||
controller_class: type[CacheController] | None = None,
|
||||
serializer: Serializer | None = None,
|
||||
heuristic: BaseHeuristic | None = None,
|
||||
cacheable_methods: Collection[str] | None = None,
|
||||
*args: Any,
|
||||
**kw: Any,
|
||||
) -> None:
|
||||
super().__init__(*args, **kw)
|
||||
self.cache = DictCache() if cache is None else cache
|
||||
self.heuristic = heuristic
|
||||
self.cacheable_methods = cacheable_methods or ("GET",)
|
||||
|
||||
controller_factory = controller_class or CacheController
|
||||
self.controller = controller_factory(
|
||||
self.cache, cache_etags=cache_etags, serializer=serializer
|
||||
)
|
||||
|
||||
def send(
|
||||
self,
|
||||
request: PreparedRequest,
|
||||
stream: bool = False,
|
||||
timeout: None | float | tuple[float, float] | tuple[float, None] = None,
|
||||
verify: bool | str = True,
|
||||
cert: (None | bytes | str | tuple[bytes | str, bytes | str]) = None,
|
||||
proxies: Mapping[str, str] | None = None,
|
||||
cacheable_methods: Collection[str] | None = None,
|
||||
) -> Response:
|
||||
"""
|
||||
Send a request. Use the request information to see if it
|
||||
exists in the cache and cache the response if we need to and can.
|
||||
"""
|
||||
cacheable = cacheable_methods or self.cacheable_methods
|
||||
if request.method in cacheable:
|
||||
try:
|
||||
cached_response = self.controller.cached_request(request)
|
||||
except zlib.error:
|
||||
cached_response = None
|
||||
if cached_response:
|
||||
return self.build_response(request, cached_response, from_cache=True)
|
||||
|
||||
# check for etags and add headers if appropriate
|
||||
request.headers.update(self.controller.conditional_headers(request))
|
||||
|
||||
resp = super().send(request, stream, timeout, verify, cert, proxies)
|
||||
|
||||
return resp
|
||||
|
||||
def build_response(
|
||||
self,
|
||||
request: PreparedRequest,
|
||||
response: HTTPResponse,
|
||||
from_cache: bool = False,
|
||||
cacheable_methods: Collection[str] | None = None,
|
||||
) -> Response:
|
||||
"""
|
||||
Build a response by making a request or using the cache.
|
||||
|
||||
This will end up calling send and returning a potentially
|
||||
cached response
|
||||
"""
|
||||
cacheable = cacheable_methods or self.cacheable_methods
|
||||
if not from_cache and request.method in cacheable:
|
||||
# Check for any heuristics that might update headers
|
||||
# before trying to cache.
|
||||
if self.heuristic:
|
||||
response = self.heuristic.apply(response)
|
||||
|
||||
# apply any expiration heuristics
|
||||
if response.status == 304:
|
||||
# We must have sent an ETag request. This could mean
|
||||
# that we've been expired already or that we simply
|
||||
# have an etag. In either case, we want to try and
|
||||
# update the cache if that is the case.
|
||||
cached_response = self.controller.update_cached_response(
|
||||
request, response
|
||||
)
|
||||
|
||||
if cached_response is not response:
|
||||
from_cache = True
|
||||
|
||||
# We are done with the server response, read a
|
||||
# possible response body (compliant servers will
|
||||
# not return one, but we cannot be 100% sure) and
|
||||
# release the connection back to the pool.
|
||||
response.read(decode_content=False)
|
||||
response.release_conn()
|
||||
|
||||
response = cached_response
|
||||
|
||||
# We always cache the 301 responses
|
||||
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
|
||||
self.controller.cache_response(request, response)
|
||||
else:
|
||||
# Wrap the response file with a wrapper that will cache the
|
||||
# response when the stream has been consumed.
|
||||
response._fp = CallbackFileWrapper( # type: ignore[assignment]
|
||||
response._fp, # type: ignore[arg-type]
|
||||
functools.partial(
|
||||
self.controller.cache_response, request, response
|
||||
),
|
||||
)
|
||||
if response.chunked:
|
||||
super_update_chunk_length = response._update_chunk_length
|
||||
|
||||
def _update_chunk_length(self: HTTPResponse) -> None:
|
||||
super_update_chunk_length()
|
||||
if self.chunk_left == 0:
|
||||
self._fp._close() # type: ignore[union-attr]
|
||||
|
||||
response._update_chunk_length = types.MethodType( # type: ignore[method-assign]
|
||||
_update_chunk_length, response
|
||||
)
|
||||
|
||||
resp: Response = super().build_response(request, response) # type: ignore[no-untyped-call]
|
||||
|
||||
# See if we should invalidate the cache.
|
||||
if request.method in self.invalidating_methods and resp.ok:
|
||||
assert request.url is not None
|
||||
cache_url = self.controller.cache_url(request.url)
|
||||
self.cache.delete(cache_url)
|
||||
|
||||
# Give the request a from_cache attr to let people use it
|
||||
resp.from_cache = from_cache # type: ignore[attr-defined]
|
||||
|
||||
return resp
|
||||
|
||||
def close(self) -> None:
|
||||
self.cache.close()
|
||||
super().close() # type: ignore[no-untyped-call]
|
@ -0,0 +1,74 @@
|
||||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
"""
|
||||
The cache object API for implementing caches. The default is a thread
|
||||
safe in-memory dictionary.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from threading import Lock
|
||||
from typing import IO, TYPE_CHECKING, MutableMapping
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class BaseCache:
|
||||
def get(self, key: str) -> bytes | None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def set(
|
||||
self, key: str, value: bytes, expires: int | datetime | None = None
|
||||
) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def delete(self, key: str) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def close(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class DictCache(BaseCache):
|
||||
def __init__(self, init_dict: MutableMapping[str, bytes] | None = None) -> None:
|
||||
self.lock = Lock()
|
||||
self.data = init_dict or {}
|
||||
|
||||
def get(self, key: str) -> bytes | None:
|
||||
return self.data.get(key, None)
|
||||
|
||||
def set(
|
||||
self, key: str, value: bytes, expires: int | datetime | None = None
|
||||
) -> None:
|
||||
with self.lock:
|
||||
self.data.update({key: value})
|
||||
|
||||
def delete(self, key: str) -> None:
|
||||
with self.lock:
|
||||
if key in self.data:
|
||||
self.data.pop(key)
|
||||
|
||||
|
||||
class SeparateBodyBaseCache(BaseCache):
|
||||
"""
|
||||
In this variant, the body is not stored mixed in with the metadata, but is
|
||||
passed in (as a bytes-like object) in a separate call to ``set_body()``.
|
||||
|
||||
That is, the expected interaction pattern is::
|
||||
|
||||
cache.set(key, serialized_metadata)
|
||||
cache.set_body(key)
|
||||
|
||||
Similarly, the body should be loaded separately via ``get_body()``.
|
||||
"""
|
||||
|
||||
def set_body(self, key: str, body: bytes) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_body(self, key: str) -> IO[bytes] | None:
|
||||
"""
|
||||
Return the body as file-like object.
|
||||
"""
|
||||
raise NotImplementedError()
|
@ -0,0 +1,8 @@
|
||||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from pip._vendor.cachecontrol.caches.file_cache import FileCache, SeparateBodyFileCache
|
||||
from pip._vendor.cachecontrol.caches.redis_cache import RedisCache
|
||||
|
||||
__all__ = ["FileCache", "SeparateBodyFileCache", "RedisCache"]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,182 @@
|
||||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
from textwrap import dedent
|
||||
from typing import IO, TYPE_CHECKING, Union
|
||||
from pathlib import Path
|
||||
|
||||
from pip._vendor.cachecontrol.cache import BaseCache, SeparateBodyBaseCache
|
||||
from pip._vendor.cachecontrol.controller import CacheController
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from datetime import datetime
|
||||
|
||||
from filelock import BaseFileLock
|
||||
|
||||
|
||||
def _secure_open_write(filename: str, fmode: int) -> IO[bytes]:
|
||||
# We only want to write to this file, so open it in write only mode
|
||||
flags = os.O_WRONLY
|
||||
|
||||
# os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
|
||||
# will open *new* files.
|
||||
# We specify this because we want to ensure that the mode we pass is the
|
||||
# mode of the file.
|
||||
flags |= os.O_CREAT | os.O_EXCL
|
||||
|
||||
# Do not follow symlinks to prevent someone from making a symlink that
|
||||
# we follow and insecurely open a cache file.
|
||||
if hasattr(os, "O_NOFOLLOW"):
|
||||
flags |= os.O_NOFOLLOW
|
||||
|
||||
# On Windows we'll mark this file as binary
|
||||
if hasattr(os, "O_BINARY"):
|
||||
flags |= os.O_BINARY
|
||||
|
||||
# Before we open our file, we want to delete any existing file that is
|
||||
# there
|
||||
try:
|
||||
os.remove(filename)
|
||||
except OSError:
|
||||
# The file must not exist already, so we can just skip ahead to opening
|
||||
pass
|
||||
|
||||
# Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
|
||||
# race condition happens between the os.remove and this line, that an
|
||||
# error will be raised. Because we utilize a lockfile this should only
|
||||
# happen if someone is attempting to attack us.
|
||||
fd = os.open(filename, flags, fmode)
|
||||
try:
|
||||
return os.fdopen(fd, "wb")
|
||||
|
||||
except:
|
||||
# An error occurred wrapping our FD in a file object
|
||||
os.close(fd)
|
||||
raise
|
||||
|
||||
|
||||
class _FileCacheMixin:
|
||||
"""Shared implementation for both FileCache variants."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
directory: str | Path,
|
||||
forever: bool = False,
|
||||
filemode: int = 0o0600,
|
||||
dirmode: int = 0o0700,
|
||||
lock_class: type[BaseFileLock] | None = None,
|
||||
) -> None:
|
||||
try:
|
||||
if lock_class is None:
|
||||
from filelock import FileLock
|
||||
|
||||
lock_class = FileLock
|
||||
except ImportError:
|
||||
notice = dedent(
|
||||
"""
|
||||
NOTE: In order to use the FileCache you must have
|
||||
filelock installed. You can install it via pip:
|
||||
pip install cachecontrol[filecache]
|
||||
"""
|
||||
)
|
||||
raise ImportError(notice)
|
||||
|
||||
self.directory = directory
|
||||
self.forever = forever
|
||||
self.filemode = filemode
|
||||
self.dirmode = dirmode
|
||||
self.lock_class = lock_class
|
||||
|
||||
@staticmethod
|
||||
def encode(x: str) -> str:
|
||||
return hashlib.sha224(x.encode()).hexdigest()
|
||||
|
||||
def _fn(self, name: str) -> str:
|
||||
# NOTE: This method should not change as some may depend on it.
|
||||
# See: https://github.com/ionrock/cachecontrol/issues/63
|
||||
hashed = self.encode(name)
|
||||
parts = list(hashed[:5]) + [hashed]
|
||||
return os.path.join(self.directory, *parts)
|
||||
|
||||
def get(self, key: str) -> bytes | None:
|
||||
name = self._fn(key)
|
||||
try:
|
||||
with open(name, "rb") as fh:
|
||||
return fh.read()
|
||||
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
def set(
|
||||
self, key: str, value: bytes, expires: int | datetime | None = None
|
||||
) -> None:
|
||||
name = self._fn(key)
|
||||
self._write(name, value)
|
||||
|
||||
def _write(self, path: str, data: bytes) -> None:
|
||||
"""
|
||||
Safely write the data to the given path.
|
||||
"""
|
||||
# Make sure the directory exists
|
||||
try:
|
||||
os.makedirs(os.path.dirname(path), self.dirmode)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
with self.lock_class(path + ".lock"):
|
||||
# Write our actual file
|
||||
with _secure_open_write(path, self.filemode) as fh:
|
||||
fh.write(data)
|
||||
|
||||
def _delete(self, key: str, suffix: str) -> None:
|
||||
name = self._fn(key) + suffix
|
||||
if not self.forever:
|
||||
try:
|
||||
os.remove(name)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
class FileCache(_FileCacheMixin, BaseCache):
|
||||
"""
|
||||
Traditional FileCache: body is stored in memory, so not suitable for large
|
||||
downloads.
|
||||
"""
|
||||
|
||||
def delete(self, key: str) -> None:
|
||||
self._delete(key, "")
|
||||
|
||||
|
||||
class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache):
|
||||
"""
|
||||
Memory-efficient FileCache: body is stored in a separate file, reducing
|
||||
peak memory usage.
|
||||
"""
|
||||
|
||||
def get_body(self, key: str) -> IO[bytes] | None:
|
||||
name = self._fn(key) + ".body"
|
||||
try:
|
||||
return open(name, "rb")
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
def set_body(self, key: str, body: bytes) -> None:
|
||||
name = self._fn(key) + ".body"
|
||||
self._write(name, body)
|
||||
|
||||
def delete(self, key: str) -> None:
|
||||
self._delete(key, "")
|
||||
self._delete(key, ".body")
|
||||
|
||||
|
||||
def url_to_file_path(url: str, filecache: FileCache) -> str:
|
||||
"""Return the file cache path based on the URL.
|
||||
|
||||
This does not ensure the file exists!
|
||||
"""
|
||||
key = CacheController.cache_url(url)
|
||||
return filecache._fn(key)
|
@ -0,0 +1,48 @@
|
||||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pip._vendor.cachecontrol.cache import BaseCache
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from redis import Redis
|
||||
|
||||
|
||||
class RedisCache(BaseCache):
|
||||
def __init__(self, conn: Redis[bytes]) -> None:
|
||||
self.conn = conn
|
||||
|
||||
def get(self, key: str) -> bytes | None:
|
||||
return self.conn.get(key)
|
||||
|
||||
def set(
|
||||
self, key: str, value: bytes, expires: int | datetime | None = None
|
||||
) -> None:
|
||||
if not expires:
|
||||
self.conn.set(key, value)
|
||||
elif isinstance(expires, datetime):
|
||||
now_utc = datetime.now(timezone.utc)
|
||||
if expires.tzinfo is None:
|
||||
now_utc = now_utc.replace(tzinfo=None)
|
||||
delta = expires - now_utc
|
||||
self.conn.setex(key, int(delta.total_seconds()), value)
|
||||
else:
|
||||
self.conn.setex(key, expires, value)
|
||||
|
||||
def delete(self, key: str) -> None:
|
||||
self.conn.delete(key)
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Helper for clearing all the keys in a database. Use with
|
||||
caution!"""
|
||||
for key in self.conn.keys():
|
||||
self.conn.delete(key)
|
||||
|
||||
def close(self) -> None:
|
||||
"""Redis uses connection pooling, no need to close the connection."""
|
||||
pass
|
@ -0,0 +1,499 @@
|
||||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
"""
|
||||
The httplib2 algorithms ported for use with requests.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import calendar
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from email.utils import parsedate_tz
|
||||
from typing import TYPE_CHECKING, Collection, Mapping
|
||||
|
||||
from pip._vendor.requests.structures import CaseInsensitiveDict
|
||||
|
||||
from pip._vendor.cachecontrol.cache import DictCache, SeparateBodyBaseCache
|
||||
from pip._vendor.cachecontrol.serialize import Serializer
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Literal
|
||||
|
||||
from pip._vendor.requests import PreparedRequest
|
||||
from pip._vendor.urllib3 import HTTPResponse
|
||||
|
||||
from pip._vendor.cachecontrol.cache import BaseCache
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
|
||||
|
||||
PERMANENT_REDIRECT_STATUSES = (301, 308)
|
||||
|
||||
|
||||
def parse_uri(uri: str) -> tuple[str, str, str, str, str]:
|
||||
"""Parses a URI using the regex given in Appendix B of RFC 3986.
|
||||
|
||||
(scheme, authority, path, query, fragment) = parse_uri(uri)
|
||||
"""
|
||||
match = URI.match(uri)
|
||||
assert match is not None
|
||||
groups = match.groups()
|
||||
return (groups[1], groups[3], groups[4], groups[6], groups[8])
|
||||
|
||||
|
||||
class CacheController:
|
||||
"""An interface to see if request should cached or not."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cache: BaseCache | None = None,
|
||||
cache_etags: bool = True,
|
||||
serializer: Serializer | None = None,
|
||||
status_codes: Collection[int] | None = None,
|
||||
):
|
||||
self.cache = DictCache() if cache is None else cache
|
||||
self.cache_etags = cache_etags
|
||||
self.serializer = serializer or Serializer()
|
||||
self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308)
|
||||
|
||||
@classmethod
|
||||
def _urlnorm(cls, uri: str) -> str:
|
||||
"""Normalize the URL to create a safe key for the cache"""
|
||||
(scheme, authority, path, query, fragment) = parse_uri(uri)
|
||||
if not scheme or not authority:
|
||||
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
|
||||
|
||||
scheme = scheme.lower()
|
||||
authority = authority.lower()
|
||||
|
||||
if not path:
|
||||
path = "/"
|
||||
|
||||
# Could do syntax based normalization of the URI before
|
||||
# computing the digest. See Section 6.2.2 of Std 66.
|
||||
request_uri = query and "?".join([path, query]) or path
|
||||
defrag_uri = scheme + "://" + authority + request_uri
|
||||
|
||||
return defrag_uri
|
||||
|
||||
@classmethod
|
||||
def cache_url(cls, uri: str) -> str:
|
||||
return cls._urlnorm(uri)
|
||||
|
||||
def parse_cache_control(self, headers: Mapping[str, str]) -> dict[str, int | None]:
|
||||
known_directives = {
|
||||
# https://tools.ietf.org/html/rfc7234#section-5.2
|
||||
"max-age": (int, True),
|
||||
"max-stale": (int, False),
|
||||
"min-fresh": (int, True),
|
||||
"no-cache": (None, False),
|
||||
"no-store": (None, False),
|
||||
"no-transform": (None, False),
|
||||
"only-if-cached": (None, False),
|
||||
"must-revalidate": (None, False),
|
||||
"public": (None, False),
|
||||
"private": (None, False),
|
||||
"proxy-revalidate": (None, False),
|
||||
"s-maxage": (int, True),
|
||||
}
|
||||
|
||||
cc_headers = headers.get("cache-control", headers.get("Cache-Control", ""))
|
||||
|
||||
retval: dict[str, int | None] = {}
|
||||
|
||||
for cc_directive in cc_headers.split(","):
|
||||
if not cc_directive.strip():
|
||||
continue
|
||||
|
||||
parts = cc_directive.split("=", 1)
|
||||
directive = parts[0].strip()
|
||||
|
||||
try:
|
||||
typ, required = known_directives[directive]
|
||||
except KeyError:
|
||||
logger.debug("Ignoring unknown cache-control directive: %s", directive)
|
||||
continue
|
||||
|
||||
if not typ or not required:
|
||||
retval[directive] = None
|
||||
if typ:
|
||||
try:
|
||||
retval[directive] = typ(parts[1].strip())
|
||||
except IndexError:
|
||||
if required:
|
||||
logger.debug(
|
||||
"Missing value for cache-control " "directive: %s",
|
||||
directive,
|
||||
)
|
||||
except ValueError:
|
||||
logger.debug(
|
||||
"Invalid value for cache-control directive " "%s, must be %s",
|
||||
directive,
|
||||
typ.__name__,
|
||||
)
|
||||
|
||||
return retval
|
||||
|
||||
def _load_from_cache(self, request: PreparedRequest) -> HTTPResponse | None:
|
||||
"""
|
||||
Load a cached response, or return None if it's not available.
|
||||
"""
|
||||
# We do not support caching of partial content: so if the request contains a
|
||||
# Range header then we don't want to load anything from the cache.
|
||||
if "Range" in request.headers:
|
||||
return None
|
||||
|
||||
cache_url = request.url
|
||||
assert cache_url is not None
|
||||
cache_data = self.cache.get(cache_url)
|
||||
if cache_data is None:
|
||||
logger.debug("No cache entry available")
|
||||
return None
|
||||
|
||||
if isinstance(self.cache, SeparateBodyBaseCache):
|
||||
body_file = self.cache.get_body(cache_url)
|
||||
else:
|
||||
body_file = None
|
||||
|
||||
result = self.serializer.loads(request, cache_data, body_file)
|
||||
if result is None:
|
||||
logger.warning("Cache entry deserialization failed, entry ignored")
|
||||
return result
|
||||
|
||||
def cached_request(self, request: PreparedRequest) -> HTTPResponse | Literal[False]:
|
||||
"""
|
||||
Return a cached response if it exists in the cache, otherwise
|
||||
return False.
|
||||
"""
|
||||
assert request.url is not None
|
||||
cache_url = self.cache_url(request.url)
|
||||
logger.debug('Looking up "%s" in the cache', cache_url)
|
||||
cc = self.parse_cache_control(request.headers)
|
||||
|
||||
# Bail out if the request insists on fresh data
|
||||
if "no-cache" in cc:
|
||||
logger.debug('Request header has "no-cache", cache bypassed')
|
||||
return False
|
||||
|
||||
if "max-age" in cc and cc["max-age"] == 0:
|
||||
logger.debug('Request header has "max_age" as 0, cache bypassed')
|
||||
return False
|
||||
|
||||
# Check whether we can load the response from the cache:
|
||||
resp = self._load_from_cache(request)
|
||||
if not resp:
|
||||
return False
|
||||
|
||||
# If we have a cached permanent redirect, return it immediately. We
|
||||
# don't need to test our response for other headers b/c it is
|
||||
# intrinsically "cacheable" as it is Permanent.
|
||||
#
|
||||
# See:
|
||||
# https://tools.ietf.org/html/rfc7231#section-6.4.2
|
||||
#
|
||||
# Client can try to refresh the value by repeating the request
|
||||
# with cache busting headers as usual (ie no-cache).
|
||||
if int(resp.status) in PERMANENT_REDIRECT_STATUSES:
|
||||
msg = (
|
||||
"Returning cached permanent redirect response "
|
||||
"(ignoring date and etag information)"
|
||||
)
|
||||
logger.debug(msg)
|
||||
return resp
|
||||
|
||||
headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers)
|
||||
if not headers or "date" not in headers:
|
||||
if "etag" not in headers:
|
||||
# Without date or etag, the cached response can never be used
|
||||
# and should be deleted.
|
||||
logger.debug("Purging cached response: no date or etag")
|
||||
self.cache.delete(cache_url)
|
||||
logger.debug("Ignoring cached response: no date")
|
||||
return False
|
||||
|
||||
now = time.time()
|
||||
time_tuple = parsedate_tz(headers["date"])
|
||||
assert time_tuple is not None
|
||||
date = calendar.timegm(time_tuple[:6])
|
||||
current_age = max(0, now - date)
|
||||
logger.debug("Current age based on date: %i", current_age)
|
||||
|
||||
# TODO: There is an assumption that the result will be a
|
||||
# urllib3 response object. This may not be best since we
|
||||
# could probably avoid instantiating or constructing the
|
||||
# response until we know we need it.
|
||||
resp_cc = self.parse_cache_control(headers)
|
||||
|
||||
# determine freshness
|
||||
freshness_lifetime = 0
|
||||
|
||||
# Check the max-age pragma in the cache control header
|
||||
max_age = resp_cc.get("max-age")
|
||||
if max_age is not None:
|
||||
freshness_lifetime = max_age
|
||||
logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime)
|
||||
|
||||
# If there isn't a max-age, check for an expires header
|
||||
elif "expires" in headers:
|
||||
expires = parsedate_tz(headers["expires"])
|
||||
if expires is not None:
|
||||
expire_time = calendar.timegm(expires[:6]) - date
|
||||
freshness_lifetime = max(0, expire_time)
|
||||
logger.debug("Freshness lifetime from expires: %i", freshness_lifetime)
|
||||
|
||||
# Determine if we are setting freshness limit in the
|
||||
# request. Note, this overrides what was in the response.
|
||||
max_age = cc.get("max-age")
|
||||
if max_age is not None:
|
||||
freshness_lifetime = max_age
|
||||
logger.debug(
|
||||
"Freshness lifetime from request max-age: %i", freshness_lifetime
|
||||
)
|
||||
|
||||
min_fresh = cc.get("min-fresh")
|
||||
if min_fresh is not None:
|
||||
# adjust our current age by our min fresh
|
||||
current_age += min_fresh
|
||||
logger.debug("Adjusted current age from min-fresh: %i", current_age)
|
||||
|
||||
# Return entry if it is fresh enough
|
||||
if freshness_lifetime > current_age:
|
||||
logger.debug('The response is "fresh", returning cached response')
|
||||
logger.debug("%i > %i", freshness_lifetime, current_age)
|
||||
return resp
|
||||
|
||||
# we're not fresh. If we don't have an Etag, clear it out
|
||||
if "etag" not in headers:
|
||||
logger.debug('The cached response is "stale" with no etag, purging')
|
||||
self.cache.delete(cache_url)
|
||||
|
||||
# return the original handler
|
||||
return False
|
||||
|
||||
def conditional_headers(self, request: PreparedRequest) -> dict[str, str]:
|
||||
resp = self._load_from_cache(request)
|
||||
new_headers = {}
|
||||
|
||||
if resp:
|
||||
headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers)
|
||||
|
||||
if "etag" in headers:
|
||||
new_headers["If-None-Match"] = headers["ETag"]
|
||||
|
||||
if "last-modified" in headers:
|
||||
new_headers["If-Modified-Since"] = headers["Last-Modified"]
|
||||
|
||||
return new_headers
|
||||
|
||||
def _cache_set(
|
||||
self,
|
||||
cache_url: str,
|
||||
request: PreparedRequest,
|
||||
response: HTTPResponse,
|
||||
body: bytes | None = None,
|
||||
expires_time: int | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Store the data in the cache.
|
||||
"""
|
||||
if isinstance(self.cache, SeparateBodyBaseCache):
|
||||
# We pass in the body separately; just put a placeholder empty
|
||||
# string in the metadata.
|
||||
self.cache.set(
|
||||
cache_url,
|
||||
self.serializer.dumps(request, response, b""),
|
||||
expires=expires_time,
|
||||
)
|
||||
# body is None can happen when, for example, we're only updating
|
||||
# headers, as is the case in update_cached_response().
|
||||
if body is not None:
|
||||
self.cache.set_body(cache_url, body)
|
||||
else:
|
||||
self.cache.set(
|
||||
cache_url,
|
||||
self.serializer.dumps(request, response, body),
|
||||
expires=expires_time,
|
||||
)
|
||||
|
||||
def cache_response(
|
||||
self,
|
||||
request: PreparedRequest,
|
||||
response: HTTPResponse,
|
||||
body: bytes | None = None,
|
||||
status_codes: Collection[int] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Algorithm for caching requests.
|
||||
|
||||
This assumes a requests Response object.
|
||||
"""
|
||||
# From httplib2: Don't cache 206's since we aren't going to
|
||||
# handle byte range requests
|
||||
cacheable_status_codes = status_codes or self.cacheable_status_codes
|
||||
if response.status not in cacheable_status_codes:
|
||||
logger.debug(
|
||||
"Status code %s not in %s", response.status, cacheable_status_codes
|
||||
)
|
||||
return
|
||||
|
||||
response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
|
||||
response.headers
|
||||
)
|
||||
|
||||
if "date" in response_headers:
|
||||
time_tuple = parsedate_tz(response_headers["date"])
|
||||
assert time_tuple is not None
|
||||
date = calendar.timegm(time_tuple[:6])
|
||||
else:
|
||||
date = 0
|
||||
|
||||
# If we've been given a body, our response has a Content-Length, that
|
||||
# Content-Length is valid then we can check to see if the body we've
|
||||
# been given matches the expected size, and if it doesn't we'll just
|
||||
# skip trying to cache it.
|
||||
if (
|
||||
body is not None
|
||||
and "content-length" in response_headers
|
||||
and response_headers["content-length"].isdigit()
|
||||
and int(response_headers["content-length"]) != len(body)
|
||||
):
|
||||
return
|
||||
|
||||
cc_req = self.parse_cache_control(request.headers)
|
||||
cc = self.parse_cache_control(response_headers)
|
||||
|
||||
assert request.url is not None
|
||||
cache_url = self.cache_url(request.url)
|
||||
logger.debug('Updating cache with response from "%s"', cache_url)
|
||||
|
||||
# Delete it from the cache if we happen to have it stored there
|
||||
no_store = False
|
||||
if "no-store" in cc:
|
||||
no_store = True
|
||||
logger.debug('Response header has "no-store"')
|
||||
if "no-store" in cc_req:
|
||||
no_store = True
|
||||
logger.debug('Request header has "no-store"')
|
||||
if no_store and self.cache.get(cache_url):
|
||||
logger.debug('Purging existing cache entry to honor "no-store"')
|
||||
self.cache.delete(cache_url)
|
||||
if no_store:
|
||||
return
|
||||
|
||||
# https://tools.ietf.org/html/rfc7234#section-4.1:
|
||||
# A Vary header field-value of "*" always fails to match.
|
||||
# Storing such a response leads to a deserialization warning
|
||||
# during cache lookup and is not allowed to ever be served,
|
||||
# so storing it can be avoided.
|
||||
if "*" in response_headers.get("vary", ""):
|
||||
logger.debug('Response header has "Vary: *"')
|
||||
return
|
||||
|
||||
# If we've been given an etag, then keep the response
|
||||
if self.cache_etags and "etag" in response_headers:
|
||||
expires_time = 0
|
||||
if response_headers.get("expires"):
|
||||
expires = parsedate_tz(response_headers["expires"])
|
||||
if expires is not None:
|
||||
expires_time = calendar.timegm(expires[:6]) - date
|
||||
|
||||
expires_time = max(expires_time, 14 * 86400)
|
||||
|
||||
logger.debug(f"etag object cached for {expires_time} seconds")
|
||||
logger.debug("Caching due to etag")
|
||||
self._cache_set(cache_url, request, response, body, expires_time)
|
||||
|
||||
# Add to the cache any permanent redirects. We do this before looking
|
||||
# that the Date headers.
|
||||
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
|
||||
logger.debug("Caching permanent redirect")
|
||||
self._cache_set(cache_url, request, response, b"")
|
||||
|
||||
# Add to the cache if the response headers demand it. If there
|
||||
# is no date header then we can't do anything about expiring
|
||||
# the cache.
|
||||
elif "date" in response_headers:
|
||||
time_tuple = parsedate_tz(response_headers["date"])
|
||||
assert time_tuple is not None
|
||||
date = calendar.timegm(time_tuple[:6])
|
||||
# cache when there is a max-age > 0
|
||||
max_age = cc.get("max-age")
|
||||
if max_age is not None and max_age > 0:
|
||||
logger.debug("Caching b/c date exists and max-age > 0")
|
||||
expires_time = max_age
|
||||
self._cache_set(
|
||||
cache_url,
|
||||
request,
|
||||
response,
|
||||
body,
|
||||
expires_time,
|
||||
)
|
||||
|
||||
# If the request can expire, it means we should cache it
|
||||
# in the meantime.
|
||||
elif "expires" in response_headers:
|
||||
if response_headers["expires"]:
|
||||
expires = parsedate_tz(response_headers["expires"])
|
||||
if expires is not None:
|
||||
expires_time = calendar.timegm(expires[:6]) - date
|
||||
else:
|
||||
expires_time = None
|
||||
|
||||
logger.debug(
|
||||
"Caching b/c of expires header. expires in {} seconds".format(
|
||||
expires_time
|
||||
)
|
||||
)
|
||||
self._cache_set(
|
||||
cache_url,
|
||||
request,
|
||||
response,
|
||||
body,
|
||||
expires_time,
|
||||
)
|
||||
|
||||
def update_cached_response(
|
||||
self, request: PreparedRequest, response: HTTPResponse
|
||||
) -> HTTPResponse:
|
||||
"""On a 304 we will get a new set of headers that we want to
|
||||
update our cached value with, assuming we have one.
|
||||
|
||||
This should only ever be called when we've sent an ETag and
|
||||
gotten a 304 as the response.
|
||||
"""
|
||||
assert request.url is not None
|
||||
cache_url = self.cache_url(request.url)
|
||||
cached_response = self._load_from_cache(request)
|
||||
|
||||
if not cached_response:
|
||||
# we didn't have a cached response
|
||||
return response
|
||||
|
||||
# Lets update our headers with the headers from the new request:
|
||||
# http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
|
||||
#
|
||||
# The server isn't supposed to send headers that would make
|
||||
# the cached body invalid. But... just in case, we'll be sure
|
||||
# to strip out ones we know that might be problmatic due to
|
||||
# typical assumptions.
|
||||
excluded_headers = ["content-length"]
|
||||
|
||||
cached_response.headers.update(
|
||||
{
|
||||
k: v
|
||||
for k, v in response.headers.items()
|
||||
if k.lower() not in excluded_headers
|
||||
}
|
||||
)
|
||||
|
||||
# we want a 200 b/c we have content via the cache
|
||||
cached_response.status = 200
|
||||
|
||||
# update our cache
|
||||
self._cache_set(cache_url, request, cached_response)
|
||||
|
||||
return cached_response
|
@ -0,0 +1,119 @@
|
||||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
from __future__ import annotations
|
||||
|
||||
import mmap
|
||||
from tempfile import NamedTemporaryFile
|
||||
from typing import TYPE_CHECKING, Any, Callable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from http.client import HTTPResponse
|
||||
|
||||
|
||||
class CallbackFileWrapper:
|
||||
"""
|
||||
Small wrapper around a fp object which will tee everything read into a
|
||||
buffer, and when that file is closed it will execute a callback with the
|
||||
contents of that buffer.
|
||||
|
||||
All attributes are proxied to the underlying file object.
|
||||
|
||||
This class uses members with a double underscore (__) leading prefix so as
|
||||
not to accidentally shadow an attribute.
|
||||
|
||||
The data is stored in a temporary file until it is all available. As long
|
||||
as the temporary files directory is disk-based (sometimes it's a
|
||||
memory-backed-``tmpfs`` on Linux), data will be unloaded to disk if memory
|
||||
pressure is high. For small files the disk usually won't be used at all,
|
||||
it'll all be in the filesystem memory cache, so there should be no
|
||||
performance impact.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, fp: HTTPResponse, callback: Callable[[bytes], None] | None
|
||||
) -> None:
|
||||
self.__buf = NamedTemporaryFile("rb+", delete=True)
|
||||
self.__fp = fp
|
||||
self.__callback = callback
|
||||
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
# The vaguaries of garbage collection means that self.__fp is
|
||||
# not always set. By using __getattribute__ and the private
|
||||
# name[0] allows looking up the attribute value and raising an
|
||||
# AttributeError when it doesn't exist. This stop thigns from
|
||||
# infinitely recursing calls to getattr in the case where
|
||||
# self.__fp hasn't been set.
|
||||
#
|
||||
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
|
||||
fp = self.__getattribute__("_CallbackFileWrapper__fp")
|
||||
return getattr(fp, name)
|
||||
|
||||
def __is_fp_closed(self) -> bool:
|
||||
try:
|
||||
return self.__fp.fp is None
|
||||
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
closed: bool = self.__fp.closed
|
||||
return closed
|
||||
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
# We just don't cache it then.
|
||||
# TODO: Add some logging here...
|
||||
return False
|
||||
|
||||
def _close(self) -> None:
|
||||
if self.__callback:
|
||||
if self.__buf.tell() == 0:
|
||||
# Empty file:
|
||||
result = b""
|
||||
else:
|
||||
# Return the data without actually loading it into memory,
|
||||
# relying on Python's buffer API and mmap(). mmap() just gives
|
||||
# a view directly into the filesystem's memory cache, so it
|
||||
# doesn't result in duplicate memory use.
|
||||
self.__buf.seek(0, 0)
|
||||
result = memoryview(
|
||||
mmap.mmap(self.__buf.fileno(), 0, access=mmap.ACCESS_READ)
|
||||
)
|
||||
self.__callback(result)
|
||||
|
||||
# We assign this to None here, because otherwise we can get into
|
||||
# really tricky problems where the CPython interpreter dead locks
|
||||
# because the callback is holding a reference to something which
|
||||
# has a __del__ method. Setting this to None breaks the cycle
|
||||
# and allows the garbage collector to do it's thing normally.
|
||||
self.__callback = None
|
||||
|
||||
# Closing the temporary file releases memory and frees disk space.
|
||||
# Important when caching big files.
|
||||
self.__buf.close()
|
||||
|
||||
def read(self, amt: int | None = None) -> bytes:
|
||||
data: bytes = self.__fp.read(amt)
|
||||
if data:
|
||||
# We may be dealing with b'', a sign that things are over:
|
||||
# it's passed e.g. after we've already closed self.__buf.
|
||||
self.__buf.write(data)
|
||||
if self.__is_fp_closed():
|
||||
self._close()
|
||||
|
||||
return data
|
||||
|
||||
def _safe_read(self, amt: int) -> bytes:
|
||||
data: bytes = self.__fp._safe_read(amt) # type: ignore[attr-defined]
|
||||
if amt == 2 and data == b"\r\n":
|
||||
# urllib executes this read to toss the CRLF at the end
|
||||
# of the chunk.
|
||||
return data
|
||||
|
||||
self.__buf.write(data)
|
||||
if self.__is_fp_closed():
|
||||
self._close()
|
||||
|
||||
return data
|
@ -0,0 +1,154 @@
|
||||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
from __future__ import annotations
|
||||
|
||||
import calendar
|
||||
import time
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from email.utils import formatdate, parsedate, parsedate_tz
|
||||
from typing import TYPE_CHECKING, Any, Mapping
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pip._vendor.urllib3 import HTTPResponse
|
||||
|
||||
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
|
||||
|
||||
|
||||
def expire_after(delta: timedelta, date: datetime | None = None) -> datetime:
|
||||
date = date or datetime.now(timezone.utc)
|
||||
return date + delta
|
||||
|
||||
|
||||
def datetime_to_header(dt: datetime) -> str:
|
||||
return formatdate(calendar.timegm(dt.timetuple()))
|
||||
|
||||
|
||||
class BaseHeuristic:
|
||||
def warning(self, response: HTTPResponse) -> str | None:
|
||||
"""
|
||||
Return a valid 1xx warning header value describing the cache
|
||||
adjustments.
|
||||
|
||||
The response is provided too allow warnings like 113
|
||||
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
|
||||
to explicitly say response is over 24 hours old.
|
||||
"""
|
||||
return '110 - "Response is Stale"'
|
||||
|
||||
def update_headers(self, response: HTTPResponse) -> dict[str, str]:
|
||||
"""Update the response headers with any new headers.
|
||||
|
||||
NOTE: This SHOULD always include some Warning header to
|
||||
signify that the response was cached by the client, not
|
||||
by way of the provided headers.
|
||||
"""
|
||||
return {}
|
||||
|
||||
def apply(self, response: HTTPResponse) -> HTTPResponse:
|
||||
updated_headers = self.update_headers(response)
|
||||
|
||||
if updated_headers:
|
||||
response.headers.update(updated_headers)
|
||||
warning_header_value = self.warning(response)
|
||||
if warning_header_value is not None:
|
||||
response.headers.update({"Warning": warning_header_value})
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class OneDayCache(BaseHeuristic):
|
||||
"""
|
||||
Cache the response by providing an expires 1 day in the
|
||||
future.
|
||||
"""
|
||||
|
||||
def update_headers(self, response: HTTPResponse) -> dict[str, str]:
|
||||
headers = {}
|
||||
|
||||
if "expires" not in response.headers:
|
||||
date = parsedate(response.headers["date"])
|
||||
expires = expire_after(timedelta(days=1), date=datetime(*date[:6], tzinfo=timezone.utc)) # type: ignore[index,misc]
|
||||
headers["expires"] = datetime_to_header(expires)
|
||||
headers["cache-control"] = "public"
|
||||
return headers
|
||||
|
||||
|
||||
class ExpiresAfter(BaseHeuristic):
|
||||
"""
|
||||
Cache **all** requests for a defined time period.
|
||||
"""
|
||||
|
||||
def __init__(self, **kw: Any) -> None:
|
||||
self.delta = timedelta(**kw)
|
||||
|
||||
def update_headers(self, response: HTTPResponse) -> dict[str, str]:
|
||||
expires = expire_after(self.delta)
|
||||
return {"expires": datetime_to_header(expires), "cache-control": "public"}
|
||||
|
||||
def warning(self, response: HTTPResponse) -> str | None:
|
||||
tmpl = "110 - Automatically cached for %s. Response might be stale"
|
||||
return tmpl % self.delta
|
||||
|
||||
|
||||
class LastModified(BaseHeuristic):
|
||||
"""
|
||||
If there is no Expires header already, fall back on Last-Modified
|
||||
using the heuristic from
|
||||
http://tools.ietf.org/html/rfc7234#section-4.2.2
|
||||
to calculate a reasonable value.
|
||||
|
||||
Firefox also does something like this per
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
|
||||
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
|
||||
Unlike mozilla we limit this to 24-hr.
|
||||
"""
|
||||
|
||||
cacheable_by_default_statuses = {
|
||||
200,
|
||||
203,
|
||||
204,
|
||||
206,
|
||||
300,
|
||||
301,
|
||||
404,
|
||||
405,
|
||||
410,
|
||||
414,
|
||||
501,
|
||||
}
|
||||
|
||||
def update_headers(self, resp: HTTPResponse) -> dict[str, str]:
|
||||
headers: Mapping[str, str] = resp.headers
|
||||
|
||||
if "expires" in headers:
|
||||
return {}
|
||||
|
||||
if "cache-control" in headers and headers["cache-control"] != "public":
|
||||
return {}
|
||||
|
||||
if resp.status not in self.cacheable_by_default_statuses:
|
||||
return {}
|
||||
|
||||
if "date" not in headers or "last-modified" not in headers:
|
||||
return {}
|
||||
|
||||
time_tuple = parsedate_tz(headers["date"])
|
||||
assert time_tuple is not None
|
||||
date = calendar.timegm(time_tuple[:6])
|
||||
last_modified = parsedate(headers["last-modified"])
|
||||
if last_modified is None:
|
||||
return {}
|
||||
|
||||
now = time.time()
|
||||
current_age = max(0, now - date)
|
||||
delta = date - calendar.timegm(last_modified)
|
||||
freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
|
||||
if freshness_lifetime <= current_age:
|
||||
return {}
|
||||
|
||||
expires = date + freshness_lifetime
|
||||
return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))}
|
||||
|
||||
def warning(self, resp: HTTPResponse) -> str | None:
|
||||
return None
|
@ -0,0 +1,146 @@
|
||||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
from typing import IO, TYPE_CHECKING, Any, Mapping, cast
|
||||
|
||||
from pip._vendor import msgpack
|
||||
from pip._vendor.requests.structures import CaseInsensitiveDict
|
||||
from pip._vendor.urllib3 import HTTPResponse
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pip._vendor.requests import PreparedRequest
|
||||
|
||||
|
||||
class Serializer:
|
||||
serde_version = "4"
|
||||
|
||||
def dumps(
|
||||
self,
|
||||
request: PreparedRequest,
|
||||
response: HTTPResponse,
|
||||
body: bytes | None = None,
|
||||
) -> bytes:
|
||||
response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
|
||||
response.headers
|
||||
)
|
||||
|
||||
if body is None:
|
||||
# When a body isn't passed in, we'll read the response. We
|
||||
# also update the response with a new file handler to be
|
||||
# sure it acts as though it was never read.
|
||||
body = response.read(decode_content=False)
|
||||
response._fp = io.BytesIO(body) # type: ignore[assignment]
|
||||
response.length_remaining = len(body)
|
||||
|
||||
data = {
|
||||
"response": {
|
||||
"body": body, # Empty bytestring if body is stored separately
|
||||
"headers": {str(k): str(v) for k, v in response.headers.items()},
|
||||
"status": response.status,
|
||||
"version": response.version,
|
||||
"reason": str(response.reason),
|
||||
"decode_content": response.decode_content,
|
||||
}
|
||||
}
|
||||
|
||||
# Construct our vary headers
|
||||
data["vary"] = {}
|
||||
if "vary" in response_headers:
|
||||
varied_headers = response_headers["vary"].split(",")
|
||||
for header in varied_headers:
|
||||
header = str(header).strip()
|
||||
header_value = request.headers.get(header, None)
|
||||
if header_value is not None:
|
||||
header_value = str(header_value)
|
||||
data["vary"][header] = header_value
|
||||
|
||||
return b",".join([f"cc={self.serde_version}".encode(), self.serialize(data)])
|
||||
|
||||
def serialize(self, data: dict[str, Any]) -> bytes:
|
||||
return cast(bytes, msgpack.dumps(data, use_bin_type=True))
|
||||
|
||||
def loads(
|
||||
self,
|
||||
request: PreparedRequest,
|
||||
data: bytes,
|
||||
body_file: IO[bytes] | None = None,
|
||||
) -> HTTPResponse | None:
|
||||
# Short circuit if we've been given an empty set of data
|
||||
if not data:
|
||||
return None
|
||||
|
||||
# Previous versions of this library supported other serialization
|
||||
# formats, but these have all been removed.
|
||||
if not data.startswith(f"cc={self.serde_version},".encode()):
|
||||
return None
|
||||
|
||||
data = data[5:]
|
||||
return self._loads_v4(request, data, body_file)
|
||||
|
||||
def prepare_response(
|
||||
self,
|
||||
request: PreparedRequest,
|
||||
cached: Mapping[str, Any],
|
||||
body_file: IO[bytes] | None = None,
|
||||
) -> HTTPResponse | None:
|
||||
"""Verify our vary headers match and construct a real urllib3
|
||||
HTTPResponse object.
|
||||
"""
|
||||
# Special case the '*' Vary value as it means we cannot actually
|
||||
# determine if the cached response is suitable for this request.
|
||||
# This case is also handled in the controller code when creating
|
||||
# a cache entry, but is left here for backwards compatibility.
|
||||
if "*" in cached.get("vary", {}):
|
||||
return None
|
||||
|
||||
# Ensure that the Vary headers for the cached response match our
|
||||
# request
|
||||
for header, value in cached.get("vary", {}).items():
|
||||
if request.headers.get(header, None) != value:
|
||||
return None
|
||||
|
||||
body_raw = cached["response"].pop("body")
|
||||
|
||||
headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
|
||||
data=cached["response"]["headers"]
|
||||
)
|
||||
if headers.get("transfer-encoding", "") == "chunked":
|
||||
headers.pop("transfer-encoding")
|
||||
|
||||
cached["response"]["headers"] = headers
|
||||
|
||||
try:
|
||||
body: IO[bytes]
|
||||
if body_file is None:
|
||||
body = io.BytesIO(body_raw)
|
||||
else:
|
||||
body = body_file
|
||||
except TypeError:
|
||||
# This can happen if cachecontrol serialized to v1 format (pickle)
|
||||
# using Python 2. A Python 2 str(byte string) will be unpickled as
|
||||
# a Python 3 str (unicode string), which will cause the above to
|
||||
# fail with:
|
||||
#
|
||||
# TypeError: 'str' does not support the buffer interface
|
||||
body = io.BytesIO(body_raw.encode("utf8"))
|
||||
|
||||
# Discard any `strict` parameter serialized by older version of cachecontrol.
|
||||
cached["response"].pop("strict", None)
|
||||
|
||||
return HTTPResponse(body=body, preload_content=False, **cached["response"])
|
||||
|
||||
def _loads_v4(
|
||||
self,
|
||||
request: PreparedRequest,
|
||||
data: bytes,
|
||||
body_file: IO[bytes] | None = None,
|
||||
) -> HTTPResponse | None:
|
||||
try:
|
||||
cached = msgpack.loads(data, raw=False)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
return self.prepare_response(request, cached, body_file)
|
@ -0,0 +1,43 @@
|
||||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Collection
|
||||
|
||||
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
|
||||
from pip._vendor.cachecontrol.cache import DictCache
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pip._vendor import requests
|
||||
|
||||
from pip._vendor.cachecontrol.cache import BaseCache
|
||||
from pip._vendor.cachecontrol.controller import CacheController
|
||||
from pip._vendor.cachecontrol.heuristics import BaseHeuristic
|
||||
from pip._vendor.cachecontrol.serialize import Serializer
|
||||
|
||||
|
||||
def CacheControl(
|
||||
sess: requests.Session,
|
||||
cache: BaseCache | None = None,
|
||||
cache_etags: bool = True,
|
||||
serializer: Serializer | None = None,
|
||||
heuristic: BaseHeuristic | None = None,
|
||||
controller_class: type[CacheController] | None = None,
|
||||
adapter_class: type[CacheControlAdapter] | None = None,
|
||||
cacheable_methods: Collection[str] | None = None,
|
||||
) -> requests.Session:
|
||||
cache = DictCache() if cache is None else cache
|
||||
adapter_class = adapter_class or CacheControlAdapter
|
||||
adapter = adapter_class(
|
||||
cache,
|
||||
cache_etags=cache_etags,
|
||||
serializer=serializer,
|
||||
heuristic=heuristic,
|
||||
controller_class=controller_class,
|
||||
cacheable_methods=cacheable_methods,
|
||||
)
|
||||
sess.mount("http://", adapter)
|
||||
sess.mount("https://", adapter)
|
||||
|
||||
return sess
|
Reference in New Issue
Block a user