Use HTTP caching (etag / if-none-match) in RSS downloads. - snac2 - Fork of https://codeberg.org/grunfink/snac2

commit e030fe6c5054c0a9b76a55adc80bb81d7a73fcc3
parent b3067987354ca8512979eef87d3de89adb5f741d
Author: grunfink <grunfink@comam.es>
Date:   Thu, 29 May 2025 18:03:23 +0200

Use HTTP caching (etag / if-none-match) in RSS downloads.

Diffstat:
M Makefile  | 2 +-
M Makefile.NetBSD  | 2 +-
M rss.c  | 40 ++++++++++++++++++++++++++++++++++++++--

3 files changed, 40 insertions(+), 4 deletions(-)
diff --git a/Makefile b/Makefile
@@ -68,7 +68,7 @@ mastoapi.o: mastoapi.c xs.h xs_hex.h xs_openssl.h xs_json.h xs_io.h \
  xs_time.h xs_glob.h xs_set.h xs_random.h xs_url.h xs_mime.h xs_match.h \
  xs_unicode.h snac.h http_codes.h
 rss.o: rss.c xs.h xs_html.h xs_regex.h xs_time.h xs_match.h xs_curl.h \
- snac.h http_codes.h
+ xs_openssl.h xs_json.h snac.h http_codes.h
 sandbox.o: sandbox.c xs.h snac.h http_codes.h
 snac.o: snac.c xs.h xs_hex.h xs_io.h xs_unicode_tbl.h xs_unicode.h \
  xs_json.h xs_curl.h xs_openssl.h xs_socket.h xs_unix_socket.h xs_url.h \
diff --git a/Makefile.NetBSD b/Makefile.NetBSD
@@ -57,7 +57,7 @@ mastoapi.o: mastoapi.c xs.h xs_hex.h xs_openssl.h xs_json.h xs_io.h \
  xs_time.h xs_glob.h xs_set.h xs_random.h xs_url.h xs_mime.h xs_match.h \
  xs_unicode.h snac.h http_codes.h
 rss.o: rss.c xs.h xs_html.h xs_regex.h xs_time.h xs_match.h xs_curl.h \
- snac.h http_codes.h
+ xs_openssl.h xs_json.h snac.h http_codes.h
 sandbox.o: sandbox.c xs.h snac.h http_codes.h
 snac.o: snac.c xs.h xs_hex.h xs_io.h xs_unicode_tbl.h xs_unicode.h \
  xs_json.h xs_curl.h xs_openssl.h xs_socket.h xs_unix_socket.h xs_url.h \
diff --git a/rss.c b/rss.c
@@ -7,6 +7,8 @@
 #include "xs_time.h"
 #include "xs_match.h"
 #include "xs_curl.h"
+#include "xs_openssl.h"
+#include "xs_json.h"
 
 #include "snac.h"
 
@@ -117,12 +119,37 @@ void rss_to_timeline(snac *user, const char *url)
     hdrs = xs_dict_set(hdrs, "accept",     "application/rss+xml");
     hdrs = xs_dict_set(hdrs, "user-agent", USER_AGENT);
 
+    /* get the RSS metadata */
+    xs *md5 = xs_md5_hex(url, strlen(url));
+    xs *rss_md_fn = xs_fmt("%s/rss", user->basedir);
+    mkdirx(rss_md_fn);
+    rss_md_fn = xs_str_cat(rss_md_fn, "/", md5, ".json");
+
+    xs *rss_md = NULL;
+    const char *etag = NULL;
+
+    FILE *f;
+    if ((f = fopen(rss_md_fn, "r")) != NULL) {
+        rss_md = xs_json_load(f);
+        fclose(f);
+
+        etag = xs_dict_get(rss_md, "etag");
+
+        if (xs_is_string(etag))
+            hdrs = xs_dict_set(hdrs, "if-none-match", etag);
+    }
+
+    if (rss_md == NULL)
+        rss_md = xs_dict_new();
+
     xs *payload = NULL;
     int status;
     int p_size;
 
     xs *rsp = xs_http_request("GET", url, hdrs, NULL, 0, &status, &payload, &p_size, 0);
 
+    snac_log(user, xs_fmt("parsing RSS %s %d", url, status));
+
     if (!valid_status(status) || !xs_is_string(payload))
         return;
 
@@ -131,8 +158,6 @@ void rss_to_timeline(snac *user, const char *url)
     if (!xs_is_string(ctype) || xs_str_in(ctype, "application/rss+xml") == -1)
         return;
 
-    snac_log(user, xs_fmt("parsing RSS %s", url));
-
     /* yes, parsing is done with regexes (now I have two problems blah blah blah) */
     xs *links = xs_regex_select(payload, "<link>[^<]+</link>");
     const char *link;
@@ -208,6 +233,17 @@ void rss_to_timeline(snac *user, const char *url)
             timeline_add(user, id, obj);
         }
     }
+
+    /* update the RSS metadata */
+    etag = xs_dict_get(rsp, "etag");
+
+    if (xs_is_string(etag)) {
+        rss_md = xs_dict_set(rss_md, "etag", etag);
+        if ((f = fopen(rss_md_fn, "w")) != NULL) {
+            xs_json_dump(rss_md, 4, f);
+            fclose(f);
+        }
+    }
 }

	snac2 Fork of https://codeberg.org/grunfink/snac2
	git clone https://git.inz.fi/snac2
	Log \| Files \| Refs \| README \| LICENSE

M	Makefile	\|	2	+-
M	Makefile.NetBSD	\|	2	+-
M	rss.c	\|	40	++++++++++++++++++++++++++++++++++++++--