commit ffb9ae3ffaf494590f0d2e6dced463ad6716d444
Author: Phyks (Lucas Verney) <phyks@phyks.me>
Date:   Thu Jun 9 18:31:39 2016 +0200

    Initial commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3570422
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+config.py
+__pycache__
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..303c9c2
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,8 @@
+The MIT License (MIT)
+Copyright (c) 2016 Phyks (Lucas Verney)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e0f6fa2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,28 @@
+Youtube2local
+=============
+
+This is a script to help you match Youtube music videos and playlists with
+your local song collection.
+
+
+## Usage
+
+* Install the required dependencies: `pip install -r requirements.txt`.
+* Copy and edit the `config.py.example` file to `config.py` according to your
+  needs.
+* Run the script: `python3 -m youtube2local "https://www.youtube.com/watch?v=1mkUp1V3ys0"`.
+
+
+## Extending backends
+
+For now it only features a backend using the database from an Ampache
+installation. You can easily extend backends by adding files in the `backends`
+directory, implementing the `check` function as the `ampacheSQL.py` backend
+does. Then, just edit your config accordingly.
+
+Feel free to submit any PR for new backends.
+
+
+## LICENSE
+
+Released under MIT license.
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/backends/ampacheSQL.py b/backends/ampacheSQL.py
new file mode 100644
index 0000000..5795634
--- /dev/null
+++ b/backends/ampacheSQL.py
@@ -0,0 +1,38 @@
+"""
+Checks if a song exist against Ampache SQL database directly.
+"""
+import MySQLdb
+import MySQLdb.cursors
+
+def check(str, config):
+    """
+    Check if a song is in the Ampache catalog.
+
+    Params:
+        - str is a string ot match against song names, artists, albums etc
+        - config is a configuration dictionary.
+
+    Returns:
+        A dict containing the found infos about the song or None.
+    """
+    db = MySQLdb.connect(user=config["db_user"], host=config["db_host"],
+                         passwd=config["db_password"],
+                         db=config["db_name"],
+                         cursorclass=MySQLdb.cursors.DictCursor)
+
+    c = db.cursor()
+    c.execute("""SELECT
+              song.id AS id,
+              artist.name AS artist,
+              album.name AS album,
+              song.title AS title,
+              song.file AS file,
+              MATCH(artist.name, album.name, song.title)
+              AGAINST(%s IN BOOLEAN MODE) AS score
+              FROM song
+              LEFT JOIN artist ON song.artist = artist.id
+              LEFT JOIN album ON song.album = album.id
+              WHERE MATCH(artist.name, album.name, song.title)
+              AGAINST(%s IN BOOLEAN MODE)
+              ORDER BY score DESC LIMIT 1""", (str, str,))
+    return c.fetchone()
diff --git a/config.py.example b/config.py.example
new file mode 100644
index 0000000..fe3756d
--- /dev/null
+++ b/config.py.example
@@ -0,0 +1,13 @@
+"""
+Example configuration file.
+"""
+
+config = {
+    "backend": "ampacheSQL",  # One of the available backends in backends folder
+
+# ampacheSQL parameters, only edit if using it
+    "db_name": "ampache",
+    "db_host": "localhost",
+    "db_user": "root",
+    "db_password": ""
+}
diff --git a/helpers/title.py b/helpers/title.py
new file mode 100644
index 0000000..d3ad6c1
--- /dev/null
+++ b/helpers/title.py
@@ -0,0 +1,102 @@
+"""
+Helper to clean Youtube titles, removing usual junk.
+"""
+import re
+
+
+def find_separator(str):
+    """
+    Find a common separators used in Youtube titles to separate artist and
+    track name.
+    """
+    separators = [' -- ', ' - ', ' – ', ' — ', '///', '►']
+    if len(str) == 0:
+        return None
+
+    for sep in separators:
+        index = str.find(sep)
+        if index > -1:
+            return {
+                "index": index,
+                "length": len(sep)
+            }
+
+    return None
+
+
+def split(yt_title):
+    """
+    Split a title according to found separator.
+    """
+    # Find separator
+    separator = find_separator(yt_title)
+    if separator is None or len(yt_title) == 0:
+        return {
+            "artist": None,
+            "title": None
+        }
+
+    # Split artist and title
+    artist = yt_title[0:separator["index"]]
+    title = yt_title[separator["index"] + separator["length"]:]
+
+    # Do some cleanup
+    artist = clean(artist)
+    title = clean(title)
+
+    return {
+        "artist": artist,
+        "title": title
+    }
+
+
+def clean(title):
+    """
+    Remove usual junk from a Youtube title.
+    """
+    title = re.sub(r"/^\s+|\s+$/g", '', title)
+    # **NEW**
+    title = re.sub(r"/\s*\*+\s?\S+\s?\*+$/", '', title)
+    # [whatever]
+    title = re.sub(r"/\s*\[[^\]]+\]$/", '', title)
+    # (whatever version)
+    title = re.sub(r"/\s*\([^\)]*version\)$/i", '', title)
+    # video extensions
+    title = re.sub(r"/\s*\.(avi|wmv|mpg|mpeg|flv)$/i", '', title)
+    # (LYRIC VIDEO)
+    title = re.sub(r"/\s*(LYRIC VIDEO\s*)?(lyric video\s*)/i", '', title)
+    # (Official title Stream)
+    title = re.sub(r"/\s*(Official title Stream*)/i", '', title)
+    # (official)? (music)? video
+    title = re.sub(r"/\s*(of+icial\s*)?(music\s*)?video/i", '', title)
+    # (official)? (music)? audio
+    title = re.sub(r"/\s*(of+icial\s*)?(music\s*)?audio/i", '', title)
+    # (ALBUM title)
+    title = re.sub(r"/\s*(ALBUM title\s*)?(album title\s*)/i", '', title)
+    # (Cover Art)
+    title = re.sub(r"/\s*(COVER ART\s*)?(Cover Art\s*)/i", '', title)
+    # (official)
+    title = re.sub(r"/\s*\(\s*of+icial\s*\)/i", '', title)
+    # (1999)
+    title = re.sub(r"/\s*\(\s*[0-9]{4}\s*\)/i", '', title)
+    # HD (HQ)
+    title = re.sub(r"/\s+\(\s*(HD|HQ)\s*\)$/", '', title)
+    # HD (HQ)
+    title = re.sub(r"/\s+(HD|HQ)\s*$/", '', title)
+    # video clip
+    title = re.sub(r"/\s*video\s*clip/i", '', title)
+    # Full Album
+    title = re.sub(r"/\s*full\s*album/i", '', title)
+    # live
+    title = re.sub(r"/\s+\(?live\)?$/i", '', title)
+    # Leftovers after e.g. (official video)
+    title = re.sub(r"/\(+\s*\)+/", '', title)
+    # Artist - The new "title title" featuring someone
+    title = re.sub(r"/^(|.*\s)\"(.*)\"(\s.*|)$/", '\2', title)
+    # 'title title'
+    title = re.sub(r"/^(|.*\s)'(.*)'(\s.*|)$/", '\2', title)
+    # trim white chars and dash
+    title.lstrip(" \t\n\r-")
+    title.rstrip(" \t\n\r-")
+
+    return title
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..cff0b16
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+mysqlclient==1.3.7
+youtube-dl==2016.6.3
diff --git a/youtube2local.py b/youtube2local.py
new file mode 100755
index 0000000..bb2d2b1
--- /dev/null
+++ b/youtube2local.py
@@ -0,0 +1,81 @@
+import importlib
+import json
+import sys
+
+import youtube_dl
+
+import config
+from helpers import title
+
+backend = importlib.import_module("backends.%s" % config.config["backend"])
+
+
+class MyLogger():
+    """
+    Custom logger for YoutubeDL.
+    """
+    def debug(self, msg):
+        if msg.startswith("[download]"):
+            print(msg)
+
+    def warning(self, msg):
+        pass
+
+    def error(self, msg):
+        print(msg)
+
+
+# YoutubeDL options
+YDL_OPTS = {
+    'logger': MyLogger(),
+    'ignoreerrors': True,
+    'format': 'bestaudio/best',
+    'postprocessors': [{
+        'key': 'FFmpegExtractAudio',
+        'preferredcodec': 'mp3',
+        'preferredquality': '192',
+    }],
+}
+
+
+def match(youtube_url):
+    """
+    Match videos from a Youtube URL with a local music collection.
+
+    Params:
+        - youtube_url is the Youtube URL to fetch and match.
+
+    Returns:
+        A list of dict for every Youtube video in the Youtube link (single
+        video or playlist). Each dict contains the Youtube title, the video
+        webpage URL, the video download URL, and an eventual match
+        (None if none found).
+    """
+    # Fetch infos for Youtube link
+    with youtube_dl.YoutubeDL(YDL_OPTS) as ydl:
+        result = ydl.extract_info(youtube_url, download=False)
+
+    # Single video
+    if "entries" not in result:
+        result = {"entries": [result]}
+
+    # Parse every song
+    songs = []
+    for entry in result["entries"]:
+        # Clean the entry title
+        yt_title = title.clean(entry["title"])
+        # Add the song to the list, with its eventual match
+        songs.append({
+            "match": backend.check(yt_title, config.config),
+            "yt_title": yt_title,
+            "url": entry["webpage_url"]
+        })
+    return songs
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        sys.exit("Usage: %s YOUTUBE_URL" % sys.argv[0])
+
+    print(json.dumps(match(sys.argv[1]),
+                     sort_keys=True, indent=4, separators=(',', ': ')))