-
-
Save Xe/8809f085b890b62df3d74f864cfc85fd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
eval "$(lorri direnv)" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env nix-shell | |
#! nix-shell -p python39 --run python | |
import sqlite3 | |
import urllib.request | |
con = sqlite3.connect("data.db") | |
def get_feed(feed_url): | |
req = urllib.request.Request(feed_url, headers={"User-Agent": "Xe/feedfetch"}) | |
with urllib.request.urlopen(req) as response: | |
cur = con.cursor() | |
body = response.read() | |
cur.execute(""" | |
INSERT INTO jsonfeed_raw | |
(feed_url, raw) | |
VALUES | |
(?, json(?)) | |
ON CONFLICT DO | |
UPDATE SET raw = json(?) | |
""", (feed_url, body, body)) | |
con.commit() | |
print("got feed %s" % (feed_url)) | |
get_feed("https://christine.website/blog.json") | |
con.close() | |
con = sqlite3.connect("data.db") | |
cur = con.cursor() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE IF NOT EXISTS jsonfeed_raw | |
( feed_url TEXT PRIMARY KEY | |
, scrape_date TEXT NOT NULL DEFAULT (DATE('now')) | |
, raw TEXT NOT NULL | |
); | |
CREATE TRIGGER IF NOT EXISTS jsonfeed_raw_ins | |
AFTER INSERT ON jsonfeed_raw | |
BEGIN | |
INSERT INTO jsonfeed_metadata | |
( feed_url | |
, title | |
, description | |
, home_page_url | |
, author_name | |
) | |
VALUES ( NEW.feed_url | |
, json_extract(NEW.raw, '$.title') | |
, json_extract(NEW.raw, '$.description') | |
, json_extract(NEW.raw, '$.home_page_url') | |
, json_extract(NEW.raw, '$.author.name') | |
) | |
ON CONFLICT DO | |
UPDATE SET | |
title = json_extract(NEW.raw, '$.title') | |
, description = json_extract(NEW.raw, '$.description') | |
, home_page_url = json_extract(NEW.raw, '$.home_page_url') | |
, author_name = json_extract(NEW.raw, '$.author.name') | |
; | |
INSERT INTO jsonfeed_posts | |
( url | |
, feed_url | |
, title | |
, date_published | |
, content_html | |
) | |
SELECT | |
json_extract(json_each.value, '$.url') AS url | |
, NEW.feed_url AS feed_url | |
, json_extract(json_each.value, '$.title') AS title | |
, json_extract(json_each.value, '$.date_published') AS date_published | |
, json_extract(json_each.value, '$.content_html') AS content_html | |
FROM json_each(NEW.raw, '$.items') | |
WHERE TRUE | |
ON CONFLICT DO | |
UPDATE SET title = excluded.title | |
, date_published = excluded.date_published | |
, content_html = excluded.content_html | |
; | |
END; | |
CREATE TRIGGER IF NOT EXISTS jsonfeed_raw_upd | |
AFTER UPDATE ON jsonfeed_raw | |
BEGIN | |
INSERT INTO jsonfeed_metadata | |
( feed_url | |
, title | |
, description | |
, home_page_url | |
, author_name | |
) | |
VALUES ( NEW.feed_url | |
, json_extract(NEW.raw, '$.title') | |
, json_extract(NEW.raw, '$.description') | |
, json_extract(NEW.raw, '$.home_page_url') | |
, json_extract(NEW.raw, '$.author.name') | |
) | |
ON CONFLICT DO | |
UPDATE SET | |
title = json_extract(NEW.raw, '$.title') | |
, description = json_extract(NEW.raw, '$.description') | |
, home_page_url = json_extract(NEW.raw, '$.home_page_url') | |
, author_name = json_extract(NEW.raw, '$.author.name') | |
; | |
INSERT INTO jsonfeed_posts | |
( url | |
, feed_url | |
, title | |
, date_published | |
, content_html | |
) | |
SELECT | |
json_extract(json_each.value, '$.url') AS url | |
, NEW.feed_url AS feed_url | |
, json_extract(json_each.value, '$.title') AS title | |
, json_extract(json_each.value, '$.date_published') AS date_published | |
, json_extract(json_each.value, '$.content_html') AS content_html | |
FROM json_each(NEW.raw, '$.items') | |
WHERE TRUE | |
ON CONFLICT DO | |
UPDATE SET title = excluded.title | |
, date_published = excluded.date_published | |
, content_html = excluded.content_html | |
; | |
END; | |
CREATE TABLE IF NOT EXISTS jsonfeed_metadata | |
( feed_url TEXT PRIMARY KEY | |
, title TEXT NOT NULL | |
, description TEXT | |
, home_page_url TEXT | |
, author_name TEXT | |
, updated_at TEXT NOT NULL DEFAULT (DATE('now')) | |
); | |
CREATE TABLE IF NOT EXISTS jsonfeed_posts | |
( url TEXT PRIMARY KEY | |
, feed_url TEXT NOT NULL | |
, title TEXT NOT NULL | |
, date_published TEXT NOT NULL | |
, content_html TEXT | |
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ pkgs ? import <nixpkgs> {} }: | |
pkgs.mkShell { | |
buildInputs = with pkgs; [ | |
sqliteInteractive python39 | |
# keep this line if you use bash | |
pkgs.bashInteractive | |
]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment