A few scripts to gather & analyze bits of us from Instagram & Youtube takeouts, with scraping, metadata integration, and analytics.
- Download saved/liked posts with metadata integration
- Export chats to structured JSONL format
- Activity analytics with interactive plots and statistics
- EXIF metadata preservation for downloaded media
- Download playlists, including liked playlist (metadata, audio or video using yt-dlp)
- Highlights dashboard with viewing patterns and trends (similar to A3M4/YouTube-Report)
- Wordclouds for searches and watched content
- Time series & seasonal plots
- Time-series plots of smoked cigarettes per day (this is what I use this App for ๐ฌ)
- Clone the repository:
git clone https://github.com/giopaglia/bitsofus.git
cd bitsofus- Install dependencies:
pip install -r requirements.txt- Set up environment variables:
cp .env.example .env
# Edit .env with your takeout data pathspython instagram-list.py savedpython instagram-list.py likedpython instagram-messages.pypython others/instagram-plot-activity.py
python others/instagram-plot-chat.py
python others/instagram-plot-comments.pyThis create informative plots, wordclouds and dashboards based on your searches and views
python youtube-highlights.pypython youtube-playlists.py(this plainly copies your uploaded videos to a target folder)
python youtube-myvideos.pypython daylio-smoking.pyRequest your data from Settings โ Your Activity โ Download Your Information (JSON format).
Key files:
saved/saved_posts.json- Your saved postslikes/liked_posts.json- Your liked postsmessages/- Your conversationscomments/- Your comments
Request it as part of your Google Takeout.
Key files (sorry, it's in Italian ๐ฎ๐น):
YouTube e YouTube Music/cronologia/cronologia visualizzazioni.json- Viewing historyYouTube e YouTube Music/cronologia/cronologia delle ricerche.json- Search historyYouTube e YouTube Music/playlist- Your playlistsYouTube e YouTube Music/video/- Your uploaded videosYouTube e YouTube Music/commenti*- Your commentsYouTube e YouTube Music/music (library and uploads)/music library songs.csv- Your music
The takeout does not contain the playlist of liked videos, but it can be downloaded
as .csv through the web UI using the following instructions:
Go to https://www.youtube.com/playlist?list=LL
Scroll down to the bottom to load the full playlist. You can use this piece of code (authored by McBear Holden)[https://stackoverflow.com/questions/57868201/how-to-scroll-youtube-playlist-with-javascript]:
setInterval(() => {
window.scrollTo(0,document.querySelector("ytd-playlist-video-list-renderer.style-scope").scrollHeight);
}, 50);
Then, download the playlist as csv with this:
(async function() {
const saving_playlist = window.location.href.includes('/playlist?list=');
const all_contents = saving_playlist
? document.querySelectorAll('div#contents > ytd-playlist-video-renderer > div#content > div#container > div#meta')
: document.querySelectorAll('#content > yt-lockup-view-model > div > div > yt-lockup-metadata-view-model > div.yt-lockup-metadata-view-model-wiz__text-container');
function extract_video_id(url) {
const match = url.match(/[?&]v=([^&]+)/);
return match ? match[1] : '';
}
function get_title(item) {
const el = item.querySelector('h3 > a');
return el ? el.innerText.trim() : '[Video Unavailable]';
}
function get_link(item) {
let el;
if (saving_playlist) {
el = item.querySelector('h3 > a');
} else {
el = item.querySelector('div > yt-content-metadata-view-model > div:last-child > span > span > a');
}
return el ? el.href : '';
}
function get_channel_name(item) {
let el;
if (saving_playlist) {
el = item.querySelector('ytd-video-meta-block #byline-container ytd-channel-name');
} else {
el = item.querySelector('div > yt-content-metadata-view-model > div:nth-of-type(1) > span');
}
return el ? el.innerText.trim() : '';
}
function get_channel_link(item) {
let el;
if (saving_playlist) {
el = item.querySelector('ytd-video-meta-block #byline-container ytd-channel-name a');
} else {
el = item.querySelector('div > yt-content-metadata-view-model > div:nth-of-type(1) > span > span > a');
}
return el ? el.href : '';
}
function get_views_and_date(item) {
let views = '';
let date = '';
if (saving_playlist) {
const spans = item.querySelectorAll('ytd-video-meta-block #video-info span');
if (spans.length >= 3) {
views = spans[0].innerText.trim();
date = spans[2].innerText.trim();
}
} else {
const meta = item.querySelector('div > yt-content-metadata-view-model');
if (meta) {
const parts = meta.innerText.split(' โข ');
if (parts.length >= 2) {
views = parts[0].trim();
date = parts[1].trim();
}
}
}
return { views, date };
}
function escapeCSV(str) {
return `"${String(str || '').replace(/"/g, '""')}"`;
}
let csv = `"Title","Channel Name","Channel Link","Video Link","ID video","Views","Date Posted"\n`;
for (const item of all_contents) {
const link = get_link(item);
const video_id = extract_video_id(link);
const title = get_title(item);
const channel_name = get_channel_name(item);
const channel_link = get_channel_link(item);
const { views, date } = get_views_and_date(item);
csv += [
escapeCSV(title),
escapeCSV(channel_name),
escapeCSV(channel_link),
escapeCSV(link),
escapeCSV(video_id),
escapeCSV(views),
escapeCSV(date)
].join(',') + '\n';
}
// Download CSV
const blob = new Blob([csv], { type: 'text/csv' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'youtube_export.csv';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
console.log('CSV export completed!');
})();
Thx to @evdokimovm for a starter.
Other utilities:
๐ Privacy: Your takeout data contains sensitive information. Keep it secure and don't share it publicly.
๐ Data Quality: Takeout data format may change. Report issues if scripts stop working.
Both Instagram and YouTube scripts use a blacklist system to avoid repeatedly trying to download unavailable content:
- Instagram: URLs that return "410 Gone" or "400 Bad Request" errors
- YouTube: Video IDs that are "Private", "Unavailable", or have access restrictions
Blacklist files are stored in cache/:
cache/instagram-blacklist.json- List of problematic Instagram URLscache/youtube-blacklist.json- List of problematic YouTube video IDs
You can manually edit these files to remove entries if content becomes available again.
- Fork the repository
- Create a feature branch
- Make your changes
- Add tests if applicable
- Submit a pull request
MIT License - see LICENSE file for details.
- Improve EXIF metadata integration
- Add support for more platforms (Facebook, Spotify, WhatsApp, Last.fm, etc.)
- Create web interface for analytics
- Add data validation and integrity checks
- Implement incremental updates
- Create Docker container for easy deployment