commit e030fe6c5054c0a9b76a55adc80bb81d7a73fcc3
parent b3067987354ca8512979eef87d3de89adb5f741d
Author: grunfink <grunfink@comam.es>
Date: Thu, 29 May 2025 18:03:23 +0200
Use HTTP caching (etag / if-none-match) in RSS downloads.
Diffstat:
3 files changed, 40 insertions(+), 4 deletions(-)
diff --git a/Makefile b/Makefile
@@ -68,7 +68,7 @@ mastoapi.o: mastoapi.c xs.h xs_hex.h xs_openssl.h xs_json.h xs_io.h \
xs_time.h xs_glob.h xs_set.h xs_random.h xs_url.h xs_mime.h xs_match.h \
xs_unicode.h snac.h http_codes.h
rss.o: rss.c xs.h xs_html.h xs_regex.h xs_time.h xs_match.h xs_curl.h \
- snac.h http_codes.h
+ xs_openssl.h xs_json.h snac.h http_codes.h
sandbox.o: sandbox.c xs.h snac.h http_codes.h
snac.o: snac.c xs.h xs_hex.h xs_io.h xs_unicode_tbl.h xs_unicode.h \
xs_json.h xs_curl.h xs_openssl.h xs_socket.h xs_unix_socket.h xs_url.h \
diff --git a/Makefile.NetBSD b/Makefile.NetBSD
@@ -57,7 +57,7 @@ mastoapi.o: mastoapi.c xs.h xs_hex.h xs_openssl.h xs_json.h xs_io.h \
xs_time.h xs_glob.h xs_set.h xs_random.h xs_url.h xs_mime.h xs_match.h \
xs_unicode.h snac.h http_codes.h
rss.o: rss.c xs.h xs_html.h xs_regex.h xs_time.h xs_match.h xs_curl.h \
- snac.h http_codes.h
+ xs_openssl.h xs_json.h snac.h http_codes.h
sandbox.o: sandbox.c xs.h snac.h http_codes.h
snac.o: snac.c xs.h xs_hex.h xs_io.h xs_unicode_tbl.h xs_unicode.h \
xs_json.h xs_curl.h xs_openssl.h xs_socket.h xs_unix_socket.h xs_url.h \
diff --git a/rss.c b/rss.c
@@ -7,6 +7,8 @@
#include "xs_time.h"
#include "xs_match.h"
#include "xs_curl.h"
+#include "xs_openssl.h"
+#include "xs_json.h"
#include "snac.h"
@@ -117,12 +119,37 @@ void rss_to_timeline(snac *user, const char *url)
hdrs = xs_dict_set(hdrs, "accept", "application/rss+xml");
hdrs = xs_dict_set(hdrs, "user-agent", USER_AGENT);
+ /* get the RSS metadata */
+ xs *md5 = xs_md5_hex(url, strlen(url));
+ xs *rss_md_fn = xs_fmt("%s/rss", user->basedir);
+ mkdirx(rss_md_fn);
+ rss_md_fn = xs_str_cat(rss_md_fn, "/", md5, ".json");
+
+ xs *rss_md = NULL;
+ const char *etag = NULL;
+
+ FILE *f;
+ if ((f = fopen(rss_md_fn, "r")) != NULL) {
+ rss_md = xs_json_load(f);
+ fclose(f);
+
+ etag = xs_dict_get(rss_md, "etag");
+
+ if (xs_is_string(etag))
+ hdrs = xs_dict_set(hdrs, "if-none-match", etag);
+ }
+
+ if (rss_md == NULL)
+ rss_md = xs_dict_new();
+
xs *payload = NULL;
int status;
int p_size;
xs *rsp = xs_http_request("GET", url, hdrs, NULL, 0, &status, &payload, &p_size, 0);
+ snac_log(user, xs_fmt("parsing RSS %s %d", url, status));
+
if (!valid_status(status) || !xs_is_string(payload))
return;
@@ -131,8 +158,6 @@ void rss_to_timeline(snac *user, const char *url)
if (!xs_is_string(ctype) || xs_str_in(ctype, "application/rss+xml") == -1)
return;
- snac_log(user, xs_fmt("parsing RSS %s", url));
-
/* yes, parsing is done with regexes (now I have two problems blah blah blah) */
xs *links = xs_regex_select(payload, "<link>[^<]+</link>");
const char *link;
@@ -208,6 +233,17 @@ void rss_to_timeline(snac *user, const char *url)
timeline_add(user, id, obj);
}
}
+
+ /* update the RSS metadata */
+ etag = xs_dict_get(rsp, "etag");
+
+ if (xs_is_string(etag)) {
+ rss_md = xs_dict_set(rss_md, "etag", etag);
+ if ((f = fopen(rss_md_fn, "w")) != NULL) {
+ xs_json_dump(rss_md, 4, f);
+ fclose(f);
+ }
+ }
}