Files
play-dl-test/play-dl/YouTube/utils/parser.ts

244 lines
9.3 KiB
TypeScript
Raw Normal View History

2021-09-27 22:20:50 +05:30
import { YouTubeVideo } from '../classes/Video';
import { YouTubePlayList } from '../classes/Playlist';
import { YouTubeChannel } from '../classes/Channel';
2021-09-29 20:23:16 +05:30
import { YouTube } from '..';
import { YouTubeThumbnail } from '../classes/Thumbnail';
const BLURRED_THUMBNAILS = [
'-oaymwEpCOADEI4CSFryq4qpAxsIARUAAAAAGAElAADIQj0AgKJDeAHtAZmZGUI=',
'-oaymwEiCOADEI4CSFXyq4qpAxQIARUAAIhCGAFwAcABBu0BmZkZQg==',
'-oaymwEiCOgCEMoBSFXyq4qpAxQIARUAAIhCGAFwAcABBu0BZmbmQQ==',
'-oaymwEiCNAFEJQDSFXyq4qpAxQIARUAAIhCGAFwAcABBu0BZmZmQg==',
'-oaymwEdCNAFEJQDSFryq4qpAw8IARUAAIhCGAHtAWZmZkI=',
'-oaymwEdCNACELwBSFryq4qpAw8IARUAAIhCGAHtAT0K10E='
];
2021-08-12 13:28:17 +05:30
2021-08-12 15:58:55 +05:30
export interface ParseSearchInterface {
2021-09-17 14:36:32 +05:30
type?: 'video' | 'playlist' | 'channel';
2021-08-12 15:58:55 +05:30
limit?: number;
2021-12-26 15:34:31 +05:30
language?: string;
unblurNSFWThumbnails?: boolean;
2021-08-12 15:58:55 +05:30
}
2021-08-12 13:28:17 +05:30
2021-09-17 14:36:32 +05:30
export interface thumbnail {
2021-08-30 12:10:28 +05:30
width: string;
2021-09-17 14:36:32 +05:30
height: string;
url: string;
2021-08-30 12:10:28 +05:30
}
2021-09-29 20:23:16 +05:30
/**
* Main command which converts html body data and returns the type of data requested.
* @param html body of that request
* @param options limit & type of YouTube search you want.
* @returns Array of one of YouTube type.
*/
export function ParseSearchResult(html: string, options?: ParseSearchInterface): YouTube[] {
2021-09-17 14:36:32 +05:30
if (!html) throw new Error("Can't parse Search result without data");
if (!options) options = { type: 'video', limit: 0 };
2021-12-01 18:26:22 +01:00
else if (!options.type) options.type = 'video';
const hasLimit = typeof options.limit === 'number' && options.limit > 0;
options.unblurNSFWThumbnails ??= false;
2021-09-17 14:36:32 +05:30
const data = html
.split('var ytInitialData = ')?.[1]
?.split(';</script>')[0]
2021-12-26 21:13:29 +05:30
.split(/;\s*(var|const|let)\s/)[0];
2021-09-17 14:36:32 +05:30
const json_data = JSON.parse(data);
const results = [];
const details =
json_data.contents.twoColumnSearchResultsRenderer.primaryContents.sectionListRenderer.contents.flatMap(
(s: any) => s.itemSectionRenderer?.contents
);
2021-12-01 18:26:22 +01:00
for (const detail of details) {
if (hasLimit && results.length === options.limit) break;
if (!detail || (!detail.videoRenderer && !detail.channelRenderer && !detail.playlistRenderer)) continue;
2021-12-01 18:26:22 +01:00
switch (options.type) {
case 'video': {
const parsed = parseVideo(detail);
if (parsed) {
if (options.unblurNSFWThumbnails) parsed.thumbnails.forEach(unblurThumbnail);
results.push(parsed);
}
2021-12-01 18:26:22 +01:00
break;
}
case 'channel': {
const parsed = parseChannel(detail);
if (parsed) results.push(parsed);
break;
}
case 'playlist': {
const parsed = parsePlaylist(detail);
if (parsed) {
if (options.unblurNSFWThumbnails && parsed.thumbnail) unblurThumbnail(parsed.thumbnail);
results.push(parsed);
}
2021-12-01 18:26:22 +01:00
break;
}
default:
throw new Error(`Unknown search type: ${options.type}`);
2021-08-12 15:58:55 +05:30
}
}
2021-09-17 14:36:32 +05:30
return results;
2021-08-12 15:58:55 +05:30
}
2021-09-29 20:23:16 +05:30
/**
* Function to convert [hour : minutes : seconds] format to seconds
* @param duration hour : minutes : seconds format
* @returns seconds
*/
2021-08-13 13:16:34 +05:30
function parseDuration(duration: string): number {
2021-12-01 18:26:22 +01:00
if (!duration) return 0;
2021-09-17 14:36:32 +05:30
const args = duration.split(':');
2021-08-12 15:58:55 +05:30
let dur = 0;
switch (args.length) {
case 3:
dur = parseInt(args[0]) * 60 * 60 + parseInt(args[1]) * 60 + parseInt(args[2]);
break;
case 2:
dur = parseInt(args[0]) * 60 + parseInt(args[1]);
break;
default:
dur = parseInt(args[0]);
}
return dur;
}
2021-09-29 20:23:16 +05:30
/**
* Function to parse Channel searches
* @param data body of that channel request.
* @returns YouTubeChannel class
*/
2021-09-27 22:20:50 +05:30
export function parseChannel(data?: any): YouTubeChannel {
if (!data || !data.channelRenderer) throw new Error('Failed to Parse YouTube Channel');
2021-12-01 18:26:22 +01:00
const badge = data.channelRenderer.ownerBadges?.[0]?.metadataBadgeRenderer?.style?.toLowerCase();
2021-09-17 14:36:32 +05:30
const url = `https://www.youtube.com${
data.channelRenderer.navigationEndpoint.browseEndpoint.canonicalBaseUrl ||
data.channelRenderer.navigationEndpoint.commandMetadata.webCommandMetadata.url
}`;
2021-12-01 18:26:22 +01:00
const thumbnail = data.channelRenderer.thumbnail.thumbnails[data.channelRenderer.thumbnail.thumbnails.length - 1];
2021-09-27 22:20:50 +05:30
const res = new YouTubeChannel({
2021-08-12 15:58:55 +05:30
id: data.channelRenderer.channelId,
name: data.channelRenderer.title.simpleText,
2021-08-13 13:16:34 +05:30
icon: {
2021-12-01 18:26:22 +01:00
url: thumbnail.url.replace('//', 'https://'),
width: thumbnail.width,
height: thumbnail.height
2021-08-13 13:16:34 +05:30
},
2021-08-12 15:58:55 +05:30
url: url,
2021-12-01 18:26:22 +01:00
verified: Boolean(badge?.includes('verified')),
artist: Boolean(badge?.includes('artist')),
subscribers: data.channelRenderer.subscriberCountText?.simpleText ?? '0 subscribers'
2021-08-12 15:58:55 +05:30
});
return res;
}
2021-09-29 20:23:16 +05:30
/**
* Function to parse Video searches
* @param data body of that video request.
* @returns YouTubeVideo class
*/
2021-09-27 22:20:50 +05:30
export function parseVideo(data?: any): YouTubeVideo {
if (!data || !data.videoRenderer) throw new Error('Failed to Parse YouTube Video');
2021-08-12 15:58:55 +05:30
2021-12-01 18:26:22 +01:00
const channel = data.videoRenderer.ownerText.runs[0];
const badge = data.videoRenderer.ownerBadges?.[0]?.metadataBadgeRenderer?.style?.toLowerCase();
2021-12-15 23:00:35 +01:00
const durationText = data.videoRenderer.lengthText;
2021-09-27 22:20:50 +05:30
const res = new YouTubeVideo({
2021-08-12 15:58:55 +05:30
id: data.videoRenderer.videoId,
url: `https://www.youtube.com/watch?v=${data.videoRenderer.videoId}`,
title: data.videoRenderer.title.runs[0].text,
2021-12-01 18:26:22 +01:00
description: data.videoRenderer.detailedMetadataSnippets?.[0].snippetText.runs.length
? data.videoRenderer.detailedMetadataSnippets[0].snippetText.runs.map((run: any) => run.text).join('')
: '',
2021-12-15 23:00:35 +01:00
duration: durationText ? parseDuration(durationText.simpleText) : 0,
duration_raw: durationText ? durationText.simpleText : null,
thumbnails: data.videoRenderer.thumbnail.thumbnails,
2021-08-12 15:58:55 +05:30
channel: {
2021-12-01 18:26:22 +01:00
id: channel.navigationEndpoint.browseEndpoint.browseId || null,
name: channel.text || null,
2021-09-17 14:36:32 +05:30
url: `https://www.youtube.com${
2021-12-01 18:26:22 +01:00
channel.navigationEndpoint.browseEndpoint.canonicalBaseUrl ||
channel.navigationEndpoint.commandMetadata.webCommandMetadata.url
2021-09-17 14:36:32 +05:30
}`,
2021-11-23 09:56:08 +05:30
icons: data.videoRenderer.channelThumbnailSupportedRenderers.channelThumbnailWithLinkRenderer.thumbnail
.thumbnails,
2021-12-01 18:26:22 +01:00
verified: Boolean(badge?.includes('verified')),
artist: Boolean(badge?.includes('artist'))
2021-08-12 15:58:55 +05:30
},
uploadedAt: data.videoRenderer.publishedTimeText?.simpleText ?? null,
upcoming: data.videoRenderer.upcomingEventData?.startTime
? new Date(parseInt(data.videoRenderer.upcomingEventData.startTime) * 1000)
: undefined,
2021-12-26 16:55:10 +01:00
views: data.videoRenderer.viewCountText?.simpleText?.replace(/\D/g, '') ?? 0,
2021-12-15 23:00:35 +01:00
live: durationText ? false : true
2021-08-12 15:58:55 +05:30
});
return res;
}
2021-09-29 20:23:16 +05:30
/**
* Function to parse Playlist searches
* @param data body of that playlist request.
* @returns YouTubePlaylist class
*/
2021-09-27 22:20:50 +05:30
export function parsePlaylist(data?: any): YouTubePlayList {
2021-10-03 15:19:01 +05:30
if (!data || !data.playlistRenderer) throw new Error('Failed to Parse YouTube Playlist');
2021-08-12 15:58:55 +05:30
2021-12-01 18:26:22 +01:00
const thumbnail =
data.playlistRenderer.thumbnails[0].thumbnails[data.playlistRenderer.thumbnails[0].thumbnails.length - 1];
const channel = data.playlistRenderer.shortBylineText.runs?.[0];
2021-09-27 22:20:50 +05:30
const res = new YouTubePlayList(
2021-08-12 15:58:55 +05:30
{
id: data.playlistRenderer.playlistId,
title: data.playlistRenderer.title.simpleText,
thumbnail: {
id: data.playlistRenderer.playlistId,
2021-12-01 18:26:22 +01:00
url: thumbnail.url,
height: thumbnail.height,
width: thumbnail.width
2021-08-12 15:58:55 +05:30
},
channel: {
2021-12-01 18:26:22 +01:00
id: channel?.navigationEndpoint.browseEndpoint.browseId,
name: channel?.text,
url: `https://www.youtube.com${channel?.navigationEndpoint.commandMetadata.webCommandMetadata.url}`
2021-08-12 15:58:55 +05:30
},
2021-12-26 16:55:10 +01:00
videos: parseInt(data.playlistRenderer.videoCount.replace(/\D/g, ''))
2021-08-12 15:58:55 +05:30
},
true
);
return res;
2021-09-17 14:36:32 +05:30
}
function unblurThumbnail(thumbnail: YouTubeThumbnail) {
if (BLURRED_THUMBNAILS.find((sqp) => thumbnail.url.includes(sqp))) {
thumbnail.url = thumbnail.url.split('?')[0];
// we need to update the size parameters as the sqp parameter also included a cropped size
switch (thumbnail.url.split('/').at(-1)!.split('.')[0]) {
case 'hq2':
case 'hqdefault':
thumbnail.width = 480;
thumbnail.height = 360;
break;
case 'hq720':
thumbnail.width = 1280;
thumbnail.height = 720;
break;
case 'sddefault':
thumbnail.width = 640;
thumbnail.height = 480;
break;
case 'mqdefault':
thumbnail.width = 320;
thumbnail.height = 180;
break;
case 'default':
thumbnail.width = 120;
thumbnail.height = 90;
break;
default:
thumbnail.width = thumbnail.height = NaN;
}
}
}