-
-
Notifications
You must be signed in to change notification settings - Fork 114
Open
Description
from bs4 import BeautifulSoup
import requests
import re
import json
r = requests.get('https://github.com/users/huseinzol05/projects/1/views/1')
soup = BeautifulSoup(r.content, "lxml")
data = json.loads(soup.find('script', {'id': 'memex-items-data'}).contents[0])
len(data)
parsed = []
for d in data:
t = d['memexProjectColumnValues'][0]['value']['title']['raw']
if '(' in t and ')' in t:
parsed.append(t)
sizes = []
units = {
'MB': 1e6,
'GB': 1e9,
'KB': 1e4,
}
for string in parsed:
for r in re.finditer(r'\([^()]*\)', string):
span = r.span()
subs = string[span[0] + 1: span[1] - 1]
s, unit = subs.split()
sizes.append(float(s) * units[unit])
sum(sizes) / 1e9Metadata
Metadata
Assignees
Labels
No labels