#!/usr/bin/env python3 import os import re import sys import requests from bs4 import BeautifulSoup import IPython from os import path if __name__ != '__main__': raise ImportError('This is a command-line script and not supposed to be imported.') pic_ids = [ re.match(r'.*-([0-9a-zA-Z-]{11})-unsplash\.jpg$', fn) for fn in os.listdir() ] pic_ids = [ match.group(1) for match in pic_ids if match ] for id in pic_ids: try: res = requests.get(f'https://unsplash.com/photos/{id}') soup = BeautifulSoup(res.text, features='lxml') title = soup.find('title').text match = re.match(r'(.*) photo – Free (.*)Image on Unsplash', title) if match: title, category = match.groups() else: match = re.match(r'Free (.*)Image on Unsplash', title) category, = match.groups() alts = [ img['alt'] for img in [ a.findChild('img') for a in soup.find_all('a') if a['href'].startswith('/@') ] if img ] name = re.match("Go to (.*)'s profile", alts[0]).group(1) print(f'{name}: {title if title else category.strip()}') except: print(id, file=sys.stderr)