blob: 2191d09f80eff6ded97dfccee37b9e28e9b53d5c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
#!/usr/bin/env python3
import os
import re
import sys
import requests
from bs4 import BeautifulSoup
import IPython
from os import path
if __name__ != '__main__':
raise ImportError('This is a command-line script and not supposed to be imported.')
pic_ids = [ re.match(r'.*-([0-9a-zA-Z-]{11})-unsplash\.jpg$', fn) for fn in os.listdir() ]
pic_ids = [ match.group(1) for match in pic_ids if match ]
for id in pic_ids:
try:
res = requests.get(f'https://unsplash.com/photos/{id}')
soup = BeautifulSoup(res.text, features='lxml')
title = soup.find('title').text
match = re.match(r'(.*) photo – Free (.*)Image on Unsplash', title)
if match:
title, category = match.groups()
else:
match = re.match(r'Free (.*)Image on Unsplash', title)
category, = match.groups()
alts = [ img['alt'] for img in [ a.findChild('img') for a in soup.find_all('a') if a['href'].startswith('/@') ] if img ]
name = re.match("Go to (.*)'s profile", alts[0]).group(1)
print(f'{name}: {title if title else category.strip()}')
except:
print(id, file=sys.stderr)
|