From c7bd102c8837a697902b9c5bd7b5ad86780050e0 Mon Sep 17 00:00:00 2001
From: "Phyks (Lucas Verney)" <phyks@phyks.me>
Date: Wed, 15 Jun 2016 17:27:52 +0200
Subject: [PATCH] Working version with Ampache backend

---
 backends/ampacheSQL.py | 65 +++++++++++++++++++++++++++++++++++++++++-
 helpers/title.py       | 61 ++++++++++++++++++++-------------------
 youtube2local.py       | 20 ++++++++++---
 3 files changed, 111 insertions(+), 35 deletions(-)

diff --git a/backends/ampacheSQL.py b/backends/ampacheSQL.py
index 5795634..621c656 100644
--- a/backends/ampacheSQL.py
+++ b/backends/ampacheSQL.py
@@ -4,7 +4,7 @@ Checks if a song exist against Ampache SQL database directly.
 import MySQLdb
 import MySQLdb.cursors
 
-def check(str, config):
+def check_title(str, config):
     """
     Check if a song is in the Ampache catalog.
 
@@ -36,3 +36,66 @@ def check(str, config):
               AGAINST(%s IN BOOLEAN MODE)
               ORDER BY score DESC LIMIT 1""", (str, str,))
     return c.fetchone()
+
+
+def check_metadata(metadata, config):
+    """
+    Check if a song is in the Ampache catalog.
+
+    Params:
+        - metadata, an artist/title/album dictionary. Album key is optional.
+        - config is a configuration dictionary.
+
+    Returns:
+        A dict containing the found infos about the song or None.
+    """
+    db = MySQLdb.connect(user=config["db_user"], host=config["db_host"],
+                         passwd=config["db_password"],
+                         db=config["db_name"],
+                         cursorclass=MySQLdb.cursors.DictCursor)
+
+    c = db.cursor()
+
+    sql = """SELECT
+             song.id AS id,
+             artist.name AS artist,
+             album.name AS album,
+             song.title AS title,
+             song.file AS file,
+             ("""
+    args = tuple([])
+    if len(metadata["artist"]) > 3:
+        sql += "MATCH(artist.name) AGAINST(%s IN BOOLEAN MODE)"
+        args += (metadata["artist"],)
+    else:
+        sql += "0"
+
+    if len(metadata["title"]) > 3:
+        sql += " + MATCH(song.title) AGAINST(%s IN BOOLEAN MODE)"
+        args += (metadata["title"],)
+
+    if "album" in metadata and metadata["album"] is not None and len(metadata["album"]) > 3:
+        sql += " + MATCH(album.name) AGAINST(%s IN BOOLEAN MODE)"
+        args += (metadata["album"],)
+
+    sql += """) AS score
+              FROM song
+              LEFT JOIN artist ON song.artist = artist.id
+              LEFT JOIN album ON song.album = album.id"""
+
+    if not len(metadata["artist"]) > 3:
+        sql += " WHERE artist.name LIKE %s"
+        args += ("%%%s%%" % (metadata["artist"],),)
+
+    if not len(metadata["title"]) > 3:
+        sql += " WHERE song.title LIKE %s"
+        args += ("%%%s%%" % (metadata["title"],),)
+
+    if "album" in metadata and metadata["album"] is not None and not len(metadata["album"]) > 3:
+        sql += " WHERE album.name LIKE %s"
+        args += ("%%%s%%" % (metadata["album"],),)
+
+    sql += " ORDER BY score DESC LIMIT 1"
+
+    c.execute(sql, args)
+    return c.fetchone()
diff --git a/helpers/title.py b/helpers/title.py
index d3ad6c1..db2a53e 100644
--- a/helpers/title.py
+++ b/helpers/title.py
@@ -1,5 +1,8 @@
 """
 Helper to clean Youtube titles, removing usual junk.
+
+Adapted from
+https://github.com/david-sabata/web-scrobbler/blob/master/connectors/v2/youtube.js
 """
 import re
 
@@ -31,10 +34,7 @@ def split(yt_title):
     # Find separator
     separator = find_separator(yt_title)
     if separator is None or len(yt_title) == 0:
-        return {
-            "artist": None,
-            "title": None
-        }
+        return None
 
     # Split artist and title
     artist = yt_title[0:separator["index"]]
@@ -46,7 +46,8 @@ def split(yt_title):
 
     return {
         "artist": artist,
-        "title": title
+        "title": title,
+        "album": None
     }
 
 
@@ -54,49 +55,49 @@ def clean(title):
     """
     Remove usual junk from a Youtube title.
     """
-    title = re.sub(r"/^\s+|\s+$/g", '', title)
+    title = re.sub(r"^\s+|\s+$g", '', title)
     # **NEW**
-    title = re.sub(r"/\s*\*+\s?\S+\s?\*+$/", '', title)
+    title = re.sub(r"\s*\*+\s?\S+\s?\*+$", '', title)
     # [whatever]
-    title = re.sub(r"/\s*\[[^\]]+\]$/", '', title)
+    title = re.sub(r"\[[^\]]+\]$", '', title)
     # (whatever version)
-    title = re.sub(r"/\s*\([^\)]*version\)$/i", '', title)
+    title = re.sub(r"(?i)\s*\([^\)]*version\)$", '', title)
     # video extensions
-    title = re.sub(r"/\s*\.(avi|wmv|mpg|mpeg|flv)$/i", '', title)
+    title = re.sub(r"(?i)\s*\.(avi|wmv|mpg|mpeg|flv)$", '', title)
     # (LYRIC VIDEO)
-    title = re.sub(r"/\s*(LYRIC VIDEO\s*)?(lyric video\s*)/i", '', title)
+    title = re.sub(r"(?i)(LYRIC VIDEO\s*)?(lyric video\s*)", '', title)
     # (Official title Stream)
-    title = re.sub(r"/\s*(Official title Stream*)/i", '', title)
+    title = re.sub(r"(?i)(Official title Stream*)", '', title)
     # (official)? (music)? video
-    title = re.sub(r"/\s*(of+icial\s*)?(music\s*)?video/i", '', title)
+    title = re.sub(r"(?i)(of+icial\s*)?(music\s*)?video", '', title)
     # (official)? (music)? audio
-    title = re.sub(r"/\s*(of+icial\s*)?(music\s*)?audio/i", '', title)
+    title = re.sub(r"(?i)\s*(of+icial\s*)?(music\s*)?audio", '', title)
     # (ALBUM title)
-    title = re.sub(r"/\s*(ALBUM title\s*)?(album title\s*)/i", '', title)
+    title = re.sub(r"(?i)\s*(ALBUM title\s*)?(album title\s*)", '', title)
     # (Cover Art)
-    title = re.sub(r"/\s*(COVER ART\s*)?(Cover Art\s*)/i", '', title)
+    title = re.sub(r"(?i)\s*(COVER ART\s*)?(Cover Art\s*)", '', title)
     # (official)
-    title = re.sub(r"/\s*\(\s*of+icial\s*\)/i", '', title)
+    title = re.sub(r"(?i)\s*\(\s*of+icial\s*\)", '', title)
     # (1999)
-    title = re.sub(r"/\s*\(\s*[0-9]{4}\s*\)/i", '', title)
+    title = re.sub(r"(?i)\s*\(\s*[0-9]{4}\s*\)", '', title)
     # HD (HQ)
-    title = re.sub(r"/\s+\(\s*(HD|HQ)\s*\)$/", '', title)
+    title = re.sub(r"\s+\(\s*(HD|HQ)\s*\)$", '', title)
     # HD (HQ)
-    title = re.sub(r"/\s+(HD|HQ)\s*$/", '', title)
+    title = re.sub(r"\s+(HD|HQ)\s*$", '', title)
     # video clip
-    title = re.sub(r"/\s*video\s*clip/i", '', title)
+    title = re.sub(r"(?i)\s*video\s*clip", '', title)
     # Full Album
-    title = re.sub(r"/\s*full\s*album/i", '', title)
+    title = re.sub(r"(?i)\s*full\s*album", '', title)
     # live
-    title = re.sub(r"/\s+\(?live\)?$/i", '', title)
+    title = re.sub(r"(?i)\s+\(?live\)?$", '', title)
     # Leftovers after e.g. (official video)
-    title = re.sub(r"/\(+\s*\)+/", '', title)
-    # Artist - The new "title title" featuring someone
-    title = re.sub(r"/^(|.*\s)\"(.*)\"(\s.*|)$/", '\2', title)
+    title = re.sub(r"\(+\s*\)+", '', title)
+    # Remove featurings
+    title = re.sub(r"\(feat\. .*?\)", '', title)
     # 'title title'
-    title = re.sub(r"/^(|.*\s)'(.*)'(\s.*|)$/", '\2', title)
-    # trim white chars and dash
-    title.lstrip(" \t\n\r-")
-    title.rstrip(" \t\n\r-")
+    title = re.sub(r"^(|.*\s)'(.*)'(\s.*|)$", '\2', title)
+    # trim white chars, dash and quotes
+    title = title.strip()
+    title = title.strip("-\"'")
 
     return title
diff --git a/youtube2local.py b/youtube2local.py
index bb2d2b1..65a70b6 100755
--- a/youtube2local.py
+++ b/youtube2local.py
@@ -62,12 +62,24 @@ def match(youtube_url):
     # Parse every song
     songs = []
     for entry in result["entries"]:
-        # Clean the entry title
-        yt_title = title.clean(entry["title"])
+        # Try to fetch metadata from the title
+        metadata = title.split(entry["title"])
+        if metadata is not None:
+            # Try to find a match
+            song_match = backend.check_metadata(metadata, config.config),
+            if match is not None:
+                songs.append({
+                    "match": song_match,
+                    "yt_title": entry["title"],
+                    "url": entry["webpage_url"]
+                })
+                # Go on with next song
+                continue
         # Add the song to the list, with its eventual match
         songs.append({
-            "match": backend.check(yt_title, config.config),
-            "yt_title": yt_title,
+            "match": backend.check_title(title.clean(entry["title"]),
+                                         config.config),
+            "yt_title": entry["title"],
             "url": entry["webpage_url"]
         })
     return songs