snac2

Fork of https://codeberg.org/grunfink/snac2
git clone https://git.inz.fi/snac2
Log | Files | Refs | README | LICENSE

commit e030fe6c5054c0a9b76a55adc80bb81d7a73fcc3
parent b3067987354ca8512979eef87d3de89adb5f741d
Author: grunfink <grunfink@comam.es>
Date:   Thu, 29 May 2025 18:03:23 +0200

Use HTTP caching (etag / if-none-match) in RSS downloads.

Diffstat:
MMakefile | 2+-
MMakefile.NetBSD | 2+-
Mrss.c | 40++++++++++++++++++++++++++++++++++++++--
3 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile @@ -68,7 +68,7 @@ mastoapi.o: mastoapi.c xs.h xs_hex.h xs_openssl.h xs_json.h xs_io.h \ xs_time.h xs_glob.h xs_set.h xs_random.h xs_url.h xs_mime.h xs_match.h \ xs_unicode.h snac.h http_codes.h rss.o: rss.c xs.h xs_html.h xs_regex.h xs_time.h xs_match.h xs_curl.h \ - snac.h http_codes.h + xs_openssl.h xs_json.h snac.h http_codes.h sandbox.o: sandbox.c xs.h snac.h http_codes.h snac.o: snac.c xs.h xs_hex.h xs_io.h xs_unicode_tbl.h xs_unicode.h \ xs_json.h xs_curl.h xs_openssl.h xs_socket.h xs_unix_socket.h xs_url.h \ diff --git a/Makefile.NetBSD b/Makefile.NetBSD @@ -57,7 +57,7 @@ mastoapi.o: mastoapi.c xs.h xs_hex.h xs_openssl.h xs_json.h xs_io.h \ xs_time.h xs_glob.h xs_set.h xs_random.h xs_url.h xs_mime.h xs_match.h \ xs_unicode.h snac.h http_codes.h rss.o: rss.c xs.h xs_html.h xs_regex.h xs_time.h xs_match.h xs_curl.h \ - snac.h http_codes.h + xs_openssl.h xs_json.h snac.h http_codes.h sandbox.o: sandbox.c xs.h snac.h http_codes.h snac.o: snac.c xs.h xs_hex.h xs_io.h xs_unicode_tbl.h xs_unicode.h \ xs_json.h xs_curl.h xs_openssl.h xs_socket.h xs_unix_socket.h xs_url.h \ diff --git a/rss.c b/rss.c @@ -7,6 +7,8 @@ #include "xs_time.h" #include "xs_match.h" #include "xs_curl.h" +#include "xs_openssl.h" +#include "xs_json.h" #include "snac.h" @@ -117,12 +119,37 @@ void rss_to_timeline(snac *user, const char *url) hdrs = xs_dict_set(hdrs, "accept", "application/rss+xml"); hdrs = xs_dict_set(hdrs, "user-agent", USER_AGENT); + /* get the RSS metadata */ + xs *md5 = xs_md5_hex(url, strlen(url)); + xs *rss_md_fn = xs_fmt("%s/rss", user->basedir); + mkdirx(rss_md_fn); + rss_md_fn = xs_str_cat(rss_md_fn, "/", md5, ".json"); + + xs *rss_md = NULL; + const char *etag = NULL; + + FILE *f; + if ((f = fopen(rss_md_fn, "r")) != NULL) { + rss_md = xs_json_load(f); + fclose(f); + + etag = xs_dict_get(rss_md, "etag"); + + if (xs_is_string(etag)) + hdrs = xs_dict_set(hdrs, "if-none-match", etag); + } + + if (rss_md == NULL) + rss_md = xs_dict_new(); + xs *payload = NULL; int status; int p_size; xs *rsp = xs_http_request("GET", url, hdrs, NULL, 0, &status, &payload, &p_size, 0); + snac_log(user, xs_fmt("parsing RSS %s %d", url, status)); + if (!valid_status(status) || !xs_is_string(payload)) return; @@ -131,8 +158,6 @@ void rss_to_timeline(snac *user, const char *url) if (!xs_is_string(ctype) || xs_str_in(ctype, "application/rss+xml") == -1) return; - snac_log(user, xs_fmt("parsing RSS %s", url)); - /* yes, parsing is done with regexes (now I have two problems blah blah blah) */ xs *links = xs_regex_select(payload, "<link>[^<]+</link>"); const char *link; @@ -208,6 +233,17 @@ void rss_to_timeline(snac *user, const char *url) timeline_add(user, id, obj); } } + + /* update the RSS metadata */ + etag = xs_dict_get(rsp, "etag"); + + if (xs_is_string(etag)) { + rss_md = xs_dict_set(rss_md, "etag", etag); + if ((f = fopen(rss_md_fn, "w")) != NULL) { + xs_json_dump(rss_md, 4, f); + fclose(f); + } + } }