From c7bd102c8837a697902b9c5bd7b5ad86780050e0 Mon Sep 17 00:00:00 2001 From: "Phyks (Lucas Verney)" Date: Wed, 15 Jun 2016 17:27:52 +0200 Subject: [PATCH] Working version with Ampache backend --- backends/ampacheSQL.py | 65 +++++++++++++++++++++++++++++++++++++++++- helpers/title.py | 61 ++++++++++++++++++++------------------- youtube2local.py | 20 ++++++++++--- 3 files changed, 111 insertions(+), 35 deletions(-) diff --git a/backends/ampacheSQL.py b/backends/ampacheSQL.py index 5795634..621c656 100644 --- a/backends/ampacheSQL.py +++ b/backends/ampacheSQL.py @@ -4,7 +4,7 @@ Checks if a song exist against Ampache SQL database directly. import MySQLdb import MySQLdb.cursors -def check(str, config): +def check_title(str, config): """ Check if a song is in the Ampache catalog. @@ -36,3 +36,66 @@ def check(str, config): AGAINST(%s IN BOOLEAN MODE) ORDER BY score DESC LIMIT 1""", (str, str,)) return c.fetchone() + + +def check_metadata(metadata, config): + """ + Check if a song is in the Ampache catalog. + + Params: + - metadata, an artist/title/album dictionary. Album key is optional. + - config is a configuration dictionary. + + Returns: + A dict containing the found infos about the song or None. + """ + db = MySQLdb.connect(user=config["db_user"], host=config["db_host"], + passwd=config["db_password"], + db=config["db_name"], + cursorclass=MySQLdb.cursors.DictCursor) + + c = db.cursor() + + sql = """SELECT + song.id AS id, + artist.name AS artist, + album.name AS album, + song.title AS title, + song.file AS file, + (""" + args = tuple([]) + if len(metadata["artist"]) > 3: + sql += "MATCH(artist.name) AGAINST(%s IN BOOLEAN MODE)" + args += (metadata["artist"],) + else: + sql += "0" + + if len(metadata["title"]) > 3: + sql += " + MATCH(song.title) AGAINST(%s IN BOOLEAN MODE)" + args += (metadata["title"],) + + if "album" in metadata and metadata["album"] is not None and len(metadata["album"]) > 3: + sql += " + MATCH(album.name) AGAINST(%s IN BOOLEAN MODE)" + args += (metadata["album"],) + + sql += """) AS score + FROM song + LEFT JOIN artist ON song.artist = artist.id + LEFT JOIN album ON song.album = album.id""" + + if not len(metadata["artist"]) > 3: + sql += " WHERE artist.name LIKE %s" + args += ("%%%s%%" % (metadata["artist"],),) + + if not len(metadata["title"]) > 3: + sql += " WHERE song.title LIKE %s" + args += ("%%%s%%" % (metadata["title"],),) + + if "album" in metadata and metadata["album"] is not None and not len(metadata["album"]) > 3: + sql += " WHERE album.name LIKE %s" + args += ("%%%s%%" % (metadata["album"],),) + + sql += " ORDER BY score DESC LIMIT 1" + + c.execute(sql, args) + return c.fetchone() diff --git a/helpers/title.py b/helpers/title.py index d3ad6c1..db2a53e 100644 --- a/helpers/title.py +++ b/helpers/title.py @@ -1,5 +1,8 @@ """ Helper to clean Youtube titles, removing usual junk. + +Adapted from +https://github.com/david-sabata/web-scrobbler/blob/master/connectors/v2/youtube.js """ import re @@ -31,10 +34,7 @@ def split(yt_title): # Find separator separator = find_separator(yt_title) if separator is None or len(yt_title) == 0: - return { - "artist": None, - "title": None - } + return None # Split artist and title artist = yt_title[0:separator["index"]] @@ -46,7 +46,8 @@ def split(yt_title): return { "artist": artist, - "title": title + "title": title, + "album": None } @@ -54,49 +55,49 @@ def clean(title): """ Remove usual junk from a Youtube title. """ - title = re.sub(r"/^\s+|\s+$/g", '', title) + title = re.sub(r"^\s+|\s+$g", '', title) # **NEW** - title = re.sub(r"/\s*\*+\s?\S+\s?\*+$/", '', title) + title = re.sub(r"\s*\*+\s?\S+\s?\*+$", '', title) # [whatever] - title = re.sub(r"/\s*\[[^\]]+\]$/", '', title) + title = re.sub(r"\[[^\]]+\]$", '', title) # (whatever version) - title = re.sub(r"/\s*\([^\)]*version\)$/i", '', title) + title = re.sub(r"(?i)\s*\([^\)]*version\)$", '', title) # video extensions - title = re.sub(r"/\s*\.(avi|wmv|mpg|mpeg|flv)$/i", '', title) + title = re.sub(r"(?i)\s*\.(avi|wmv|mpg|mpeg|flv)$", '', title) # (LYRIC VIDEO) - title = re.sub(r"/\s*(LYRIC VIDEO\s*)?(lyric video\s*)/i", '', title) + title = re.sub(r"(?i)(LYRIC VIDEO\s*)?(lyric video\s*)", '', title) # (Official title Stream) - title = re.sub(r"/\s*(Official title Stream*)/i", '', title) + title = re.sub(r"(?i)(Official title Stream*)", '', title) # (official)? (music)? video - title = re.sub(r"/\s*(of+icial\s*)?(music\s*)?video/i", '', title) + title = re.sub(r"(?i)(of+icial\s*)?(music\s*)?video", '', title) # (official)? (music)? audio - title = re.sub(r"/\s*(of+icial\s*)?(music\s*)?audio/i", '', title) + title = re.sub(r"(?i)\s*(of+icial\s*)?(music\s*)?audio", '', title) # (ALBUM title) - title = re.sub(r"/\s*(ALBUM title\s*)?(album title\s*)/i", '', title) + title = re.sub(r"(?i)\s*(ALBUM title\s*)?(album title\s*)", '', title) # (Cover Art) - title = re.sub(r"/\s*(COVER ART\s*)?(Cover Art\s*)/i", '', title) + title = re.sub(r"(?i)\s*(COVER ART\s*)?(Cover Art\s*)", '', title) # (official) - title = re.sub(r"/\s*\(\s*of+icial\s*\)/i", '', title) + title = re.sub(r"(?i)\s*\(\s*of+icial\s*\)", '', title) # (1999) - title = re.sub(r"/\s*\(\s*[0-9]{4}\s*\)/i", '', title) + title = re.sub(r"(?i)\s*\(\s*[0-9]{4}\s*\)", '', title) # HD (HQ) - title = re.sub(r"/\s+\(\s*(HD|HQ)\s*\)$/", '', title) + title = re.sub(r"\s+\(\s*(HD|HQ)\s*\)$", '', title) # HD (HQ) - title = re.sub(r"/\s+(HD|HQ)\s*$/", '', title) + title = re.sub(r"\s+(HD|HQ)\s*$", '', title) # video clip - title = re.sub(r"/\s*video\s*clip/i", '', title) + title = re.sub(r"(?i)\s*video\s*clip", '', title) # Full Album - title = re.sub(r"/\s*full\s*album/i", '', title) + title = re.sub(r"(?i)\s*full\s*album", '', title) # live - title = re.sub(r"/\s+\(?live\)?$/i", '', title) + title = re.sub(r"(?i)\s+\(?live\)?$", '', title) # Leftovers after e.g. (official video) - title = re.sub(r"/\(+\s*\)+/", '', title) - # Artist - The new "title title" featuring someone - title = re.sub(r"/^(|.*\s)\"(.*)\"(\s.*|)$/", '\2', title) + title = re.sub(r"\(+\s*\)+", '', title) + # Remove featurings + title = re.sub(r"\(feat\. .*?\)", '', title) # 'title title' - title = re.sub(r"/^(|.*\s)'(.*)'(\s.*|)$/", '\2', title) - # trim white chars and dash - title.lstrip(" \t\n\r-") - title.rstrip(" \t\n\r-") + title = re.sub(r"^(|.*\s)'(.*)'(\s.*|)$", '\2', title) + # trim white chars, dash and quotes + title = title.strip() + title = title.strip("-\"'") return title diff --git a/youtube2local.py b/youtube2local.py index bb2d2b1..65a70b6 100755 --- a/youtube2local.py +++ b/youtube2local.py @@ -62,12 +62,24 @@ def match(youtube_url): # Parse every song songs = [] for entry in result["entries"]: - # Clean the entry title - yt_title = title.clean(entry["title"]) + # Try to fetch metadata from the title + metadata = title.split(entry["title"]) + if metadata is not None: + # Try to find a match + song_match = backend.check_metadata(metadata, config.config), + if match is not None: + songs.append({ + "match": song_match, + "yt_title": entry["title"], + "url": entry["webpage_url"] + }) + # Go on with next song + continue # Add the song to the list, with its eventual match songs.append({ - "match": backend.check(yt_title, config.config), - "yt_title": yt_title, + "match": backend.check_title(title.clean(entry["title"]), + config.config), + "yt_title": entry["title"], "url": entry["webpage_url"] }) return songs