Initial commit

This commit is contained in:
Lucas Verney 2016-06-09 18:31:39 +02:00
commit ffb9ae3ffa
9 changed files with 274 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
config.py
__pycache__

8
LICENSE Normal file
View File

@ -0,0 +1,8 @@
The MIT License (MIT)
Copyright (c) 2016 Phyks (Lucas Verney)
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

28
README.md Normal file
View File

@ -0,0 +1,28 @@
Youtube2local
=============
This is a script to help you match Youtube music videos and playlists with
your local song collection.
## Usage
* Install the required dependencies: `pip install -r requirements.txt`.
* Copy and edit the `config.py.example` file to `config.py` according to your
needs.
* Run the script: `python3 -m youtube2local "https://www.youtube.com/watch?v=1mkUp1V3ys0"`.
## Extending backends
For now it only features a backend using the database from an Ampache
installation. You can easily extend backends by adding files in the `backends`
directory, implementing the `check` function as the `ampacheSQL.py` backend
does. Then, just edit your config accordingly.
Feel free to submit any PR for new backends.
## LICENSE
Released under MIT license.

0
__init__.py Normal file
View File

38
backends/ampacheSQL.py Normal file
View File

@ -0,0 +1,38 @@
"""
Checks if a song exist against Ampache SQL database directly.
"""
import MySQLdb
import MySQLdb.cursors
def check(str, config):
"""
Check if a song is in the Ampache catalog.
Params:
- str is a string ot match against song names, artists, albums etc
- config is a configuration dictionary.
Returns:
A dict containing the found infos about the song or None.
"""
db = MySQLdb.connect(user=config["db_user"], host=config["db_host"],
passwd=config["db_password"],
db=config["db_name"],
cursorclass=MySQLdb.cursors.DictCursor)
c = db.cursor()
c.execute("""SELECT
song.id AS id,
artist.name AS artist,
album.name AS album,
song.title AS title,
song.file AS file,
MATCH(artist.name, album.name, song.title)
AGAINST(%s IN BOOLEAN MODE) AS score
FROM song
LEFT JOIN artist ON song.artist = artist.id
LEFT JOIN album ON song.album = album.id
WHERE MATCH(artist.name, album.name, song.title)
AGAINST(%s IN BOOLEAN MODE)
ORDER BY score DESC LIMIT 1""", (str, str,))
return c.fetchone()

13
config.py.example Normal file
View File

@ -0,0 +1,13 @@
"""
Example configuration file.
"""
config = {
"backend": "ampacheSQL", # One of the available backends in backends folder
# ampacheSQL parameters, only edit if using it
"db_name": "ampache",
"db_host": "localhost",
"db_user": "root",
"db_password": ""
}

102
helpers/title.py Normal file
View File

@ -0,0 +1,102 @@
"""
Helper to clean Youtube titles, removing usual junk.
"""
import re
def find_separator(str):
"""
Find a common separators used in Youtube titles to separate artist and
track name.
"""
separators = [' -- ', ' - ', ' ', '', '///', '']
if len(str) == 0:
return None
for sep in separators:
index = str.find(sep)
if index > -1:
return {
"index": index,
"length": len(sep)
}
return None
def split(yt_title):
"""
Split a title according to found separator.
"""
# Find separator
separator = find_separator(yt_title)
if separator is None or len(yt_title) == 0:
return {
"artist": None,
"title": None
}
# Split artist and title
artist = yt_title[0:separator["index"]]
title = yt_title[separator["index"] + separator["length"]:]
# Do some cleanup
artist = clean(artist)
title = clean(title)
return {
"artist": artist,
"title": title
}
def clean(title):
"""
Remove usual junk from a Youtube title.
"""
title = re.sub(r"/^\s+|\s+$/g", '', title)
# **NEW**
title = re.sub(r"/\s*\*+\s?\S+\s?\*+$/", '', title)
# [whatever]
title = re.sub(r"/\s*\[[^\]]+\]$/", '', title)
# (whatever version)
title = re.sub(r"/\s*\([^\)]*version\)$/i", '', title)
# video extensions
title = re.sub(r"/\s*\.(avi|wmv|mpg|mpeg|flv)$/i", '', title)
# (LYRIC VIDEO)
title = re.sub(r"/\s*(LYRIC VIDEO\s*)?(lyric video\s*)/i", '', title)
# (Official title Stream)
title = re.sub(r"/\s*(Official title Stream*)/i", '', title)
# (official)? (music)? video
title = re.sub(r"/\s*(of+icial\s*)?(music\s*)?video/i", '', title)
# (official)? (music)? audio
title = re.sub(r"/\s*(of+icial\s*)?(music\s*)?audio/i", '', title)
# (ALBUM title)
title = re.sub(r"/\s*(ALBUM title\s*)?(album title\s*)/i", '', title)
# (Cover Art)
title = re.sub(r"/\s*(COVER ART\s*)?(Cover Art\s*)/i", '', title)
# (official)
title = re.sub(r"/\s*\(\s*of+icial\s*\)/i", '', title)
# (1999)
title = re.sub(r"/\s*\(\s*[0-9]{4}\s*\)/i", '', title)
# HD (HQ)
title = re.sub(r"/\s+\(\s*(HD|HQ)\s*\)$/", '', title)
# HD (HQ)
title = re.sub(r"/\s+(HD|HQ)\s*$/", '', title)
# video clip
title = re.sub(r"/\s*video\s*clip/i", '', title)
# Full Album
title = re.sub(r"/\s*full\s*album/i", '', title)
# live
title = re.sub(r"/\s+\(?live\)?$/i", '', title)
# Leftovers after e.g. (official video)
title = re.sub(r"/\(+\s*\)+/", '', title)
# Artist - The new "title title" featuring someone
title = re.sub(r"/^(|.*\s)\"(.*)\"(\s.*|)$/", '\2', title)
# 'title title'
title = re.sub(r"/^(|.*\s)'(.*)'(\s.*|)$/", '\2', title)
# trim white chars and dash
title.lstrip(" \t\n\r-")
title.rstrip(" \t\n\r-")
return title

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
mysqlclient==1.3.7
youtube-dl==2016.6.3

81
youtube2local.py Executable file
View File

@ -0,0 +1,81 @@
import importlib
import json
import sys
import youtube_dl
import config
from helpers import title
backend = importlib.import_module("backends.%s" % config.config["backend"])
class MyLogger():
"""
Custom logger for YoutubeDL.
"""
def debug(self, msg):
if msg.startswith("[download]"):
print(msg)
def warning(self, msg):
pass
def error(self, msg):
print(msg)
# YoutubeDL options
YDL_OPTS = {
'logger': MyLogger(),
'ignoreerrors': True,
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
}
def match(youtube_url):
"""
Match videos from a Youtube URL with a local music collection.
Params:
- youtube_url is the Youtube URL to fetch and match.
Returns:
A list of dict for every Youtube video in the Youtube link (single
video or playlist). Each dict contains the Youtube title, the video
webpage URL, the video download URL, and an eventual match
(None if none found).
"""
# Fetch infos for Youtube link
with youtube_dl.YoutubeDL(YDL_OPTS) as ydl:
result = ydl.extract_info(youtube_url, download=False)
# Single video
if "entries" not in result:
result = {"entries": [result]}
# Parse every song
songs = []
for entry in result["entries"]:
# Clean the entry title
yt_title = title.clean(entry["title"])
# Add the song to the list, with its eventual match
songs.append({
"match": backend.check(yt_title, config.config),
"yt_title": yt_title,
"url": entry["webpage_url"]
})
return songs
if __name__ == "__main__":
if len(sys.argv) < 2:
sys.exit("Usage: %s YOUTUBE_URL" % sys.argv[0])
print(json.dumps(match(sys.argv[1]),
sort_keys=True, indent=4, separators=(',', ': ')))