From 1606044a40f6e4e13721a0f3766dbd9a28d67479 Mon Sep 17 00:00:00 2001 From: jaseg Date: Fri, 16 Oct 2020 18:05:28 +0200 Subject: talk: Small fixes, add source list --- talk/credits.ods | Bin 0 -> 22123 bytes talk/pics/original/scrape.py | 36 ++++++++++++++++++++++++++++++++++++ talk/safety_reset.odp | Bin 41356088 -> 41375799 bytes talk/safety_reset.pdf | Bin 9287077 -> 9305257 bytes 4 files changed, 36 insertions(+) create mode 100644 talk/credits.ods create mode 100644 talk/pics/original/scrape.py diff --git a/talk/credits.ods b/talk/credits.ods new file mode 100644 index 0000000..dba524d Binary files /dev/null and b/talk/credits.ods differ diff --git a/talk/pics/original/scrape.py b/talk/pics/original/scrape.py new file mode 100644 index 0000000..2191d09 --- /dev/null +++ b/talk/pics/original/scrape.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +import os +import re +import sys +import requests +from bs4 import BeautifulSoup +import IPython +from os import path + +if __name__ != '__main__': + raise ImportError('This is a command-line script and not supposed to be imported.') + +pic_ids = [ re.match(r'.*-([0-9a-zA-Z-]{11})-unsplash\.jpg$', fn) for fn in os.listdir() ] +pic_ids = [ match.group(1) for match in pic_ids if match ] + +for id in pic_ids: + try: + res = requests.get(f'https://unsplash.com/photos/{id}') + soup = BeautifulSoup(res.text, features='lxml') + + title = soup.find('title').text + match = re.match(r'(.*) photo – Free (.*)Image on Unsplash', title) + if match: + title, category = match.groups() + else: + match = re.match(r'Free (.*)Image on Unsplash', title) + category, = match.groups() + + alts = [ img['alt'] for img in [ a.findChild('img') for a in soup.find_all('a') if a['href'].startswith('/@') ] if img ] + name = re.match("Go to (.*)'s profile", alts[0]).group(1) + + print(f'{name}: {title if title else category.strip()}') + except: + print(id, file=sys.stderr) + diff --git a/talk/safety_reset.odp b/talk/safety_reset.odp index bc25a86..deeb634 100644 Binary files a/talk/safety_reset.odp and b/talk/safety_reset.odp differ diff --git a/talk/safety_reset.pdf b/talk/safety_reset.pdf index bad1fb6..faf36ad 100644 Binary files a/talk/safety_reset.pdf and b/talk/safety_reset.pdf differ -- cgit