Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: add http caching mechanism #309

Merged
merged 5 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,28 @@ Default: `<!-- more -->`

----

### :material-recycle: `cache_dir`: folder where to store plugin's cached files { #cache_dir }

The plugin implements a caching mechanism, ensuring that a remote media is only get once during its life-cycle on remote HTTP server (using [Cache Control](https://pypi.org/project/CacheControl/) under the hood). It is normally not necessary to specify this setting, except for when you want to change the path within your root directory where HTTP body and metadata files are cached.

If you want to change it, use:

``` yaml
plugins:
- rss:
cache_dir: my/custom/dir
```

It's strongly recommended to add the path to your `.gitignore` file in the root of your project:

``` title=".gitignore"
.cache
```

Default: `.cache/plugins/rss`.

----

### :material-tag-multiple: `categories`: item categories { #categories }

`categories`: list of page metadata values to use as [RSS item categories](https://www.w3schools.com/xml/rss_tag_category_item.asp).
Expand Down
4 changes: 4 additions & 0 deletions mkdocs_rss_plugin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from mkdocs.config import config_options
from mkdocs.config.base import Config

# package
from mkdocs_rss_plugin.constants import DEFAULT_CACHE_FOLDER

# ############################################################################
# ########## Classes ###############
# ##################################
Expand Down Expand Up @@ -42,6 +45,7 @@ class RssPluginConfig(Config):
categories = config_options.Optional(
config_options.ListOfItems(config_options.Type(str))
)
cache_dir = config_options.Type(str, default=f"{DEFAULT_CACHE_FOLDER.resolve()}")
comments_path = config_options.Optional(config_options.Type(str))
date_from_meta = config_options.SubConfig(_DateFromMeta)
enabled = config_options.Type(bool, default=True)
Expand Down
1 change: 1 addition & 0 deletions mkdocs_rss_plugin/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# ########## Globals #############
# ################################

DEFAULT_CACHE_FOLDER = Path(".cache/plugins/rss")
DEFAULT_TEMPLATE_FOLDER = Path(__file__).parent / "templates"
DEFAULT_TEMPLATE_FILENAME = DEFAULT_TEMPLATE_FOLDER / "rss.xml.jinja2"
MKDOCS_LOGGER_NAME = "[RSS-plugin]"
Expand Down
10 changes: 6 additions & 4 deletions mkdocs_rss_plugin/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig:
self.config.enabled = False
return config

# cache dir
self.cache_dir = Path(self.config.cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
logger.debug(f"Caching HTTP requests to: {self.cache_dir.resolve()}")

# integrations - check if theme is Material and if social cards are enabled
self.integration_material_social_cards = IntegrationMaterialSocialCards(
mkdocs_config=config,
Expand All @@ -100,6 +105,7 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig:

# instantiate plugin tooling
self.util = Util(
cache_dir=self.cache_dir,
use_git=self.config.use_git,
integration_material_social_cards=self.integration_material_social_cards,
)
Expand Down Expand Up @@ -169,10 +175,6 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig:
self.config.date_from_meta.default_time = datetime.strptime(
self.config.date_from_meta.default_time, "%H:%M"
)
print(
self.config.date_from_meta.default_time,
type(self.config.date_from_meta.default_time),
)
except (TypeError, ValueError) as err:
logger.warning(
"Config error: `date_from_meta.default_time` value "
Expand Down
18 changes: 15 additions & 3 deletions mkdocs_rss_plugin/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
# 3rd party
import markdown
import urllib3
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import SeparateBodyFileCache
from git import (
GitCommandError,
GitCommandNotFound,
Expand All @@ -34,7 +36,11 @@
from requests.exceptions import ConnectionError, HTTPError

# package
from mkdocs_rss_plugin.constants import MKDOCS_LOGGER_NAME, REMOTE_REQUEST_HEADERS
from mkdocs_rss_plugin.constants import (
DEFAULT_CACHE_FOLDER,
MKDOCS_LOGGER_NAME,
REMOTE_REQUEST_HEADERS,
)
from mkdocs_rss_plugin.git_manager.ci import CiHandler
from mkdocs_rss_plugin.integrations.theme_material_social_plugin import (
IntegrationMaterialSocialCards,
Expand Down Expand Up @@ -67,6 +73,7 @@ class Util:
def __init__(
self,
path: str = ".",
cache_dir: Path = DEFAULT_CACHE_FOLDER,
use_git: bool = True,
integration_material_social_cards: Optional[
IntegrationMaterialSocialCards
Expand Down Expand Up @@ -122,8 +129,13 @@ def __init__(
self.social_cards = integration_material_social_cards

# http/s session
self.req_session = Session()
self.req_session.headers.update(REMOTE_REQUEST_HEADERS)
session = Session()
session.headers.update(REMOTE_REQUEST_HEADERS)
self.req_session = CacheControl(
sess=session,
cache=SeparateBodyFileCache(directory=cache_dir),
cacheable_methods=("GET", "HEAD"),
)

def build_url(
self, base_url: str, path: str, args_dict: Optional[dict] = None
Expand Down
2 changes: 1 addition & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Common requirements
# -----------------------


cachecontrol[filecache] >=0.14,<1
GitPython>=3.1,<3.2
mkdocs>=1.5,<2
requests>=2.31,<3
Expand Down
45 changes: 45 additions & 0 deletions tests/dev/dev_cached_http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import http.client
import logging
from pathlib import Path

import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache

http.client.HTTPConnection.debuglevel = 1
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
req_log = logging.getLogger("requests.packages.urllib3")
req_log.setLevel(logging.DEBUG)
req_log.propagate = True


sess = CacheControl(
requests.Session(), cache=FileCache(".web_cache"), cacheable_methods=("HEAD", "GET")
)


# get requests
resp = sess.get("https://geotribu.fr")
resp_img = sess.get(
"https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png"
)

# try again, cache hit expected
resp = sess.get("https://geotribu.fr")
resp_img = sess.get(
"https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png"
)

# head requests
resp_img = sess.head(
"https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png"
)


# try again, cache hit expected
resp_img = sess.head(
"https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png"
)

print(list(Path(".web_cache").iterdir()))
6 changes: 5 additions & 1 deletion tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
# 3rd party
from mkdocs.config.base import Config

# plugin target
from mkdocs_rss_plugin.config import RssPluginConfig

# plugin target
from mkdocs_rss_plugin.constants import DEFAULT_CACHE_FOLDER
from mkdocs_rss_plugin.plugin import GitRssPlugin

# test suite
Expand Down Expand Up @@ -62,6 +64,7 @@ def test_plugin_config_defaults(self):
"abstract_chars_count": 160,
"abstract_delimiter": "<!-- more -->",
"categories": None,
"cache_dir": f"{DEFAULT_CACHE_FOLDER.resolve()}",
"comments_path": None,
"date_from_meta": {
"as_creation": "git",
Expand Down Expand Up @@ -105,6 +108,7 @@ def test_plugin_config_image(self):
expected = {
"abstract_chars_count": 160,
"abstract_delimiter": "<!-- more -->",
"cache_dir": f"{DEFAULT_CACHE_FOLDER.resolve()}",
"categories": None,
"comments_path": None,
"date_from_meta": {
Expand Down