grooveondemand/groove/db/scanner.py

207 lines
6.3 KiB
Python
Raw Normal View History

2022-11-27 18:42:46 -08:00
import asyncio
2022-11-20 16:26:40 -08:00
import logging
import os
2022-11-27 18:42:46 -08:00
2022-12-21 15:17:13 -08:00
from itertools import chain
2022-11-20 16:26:40 -08:00
from pathlib import Path
from typing import Callable, Union, Iterable
2022-12-21 15:17:13 -08:00
import music_tag
import rich.repr
from rich.console import Console
from rich.progress import (
Progress,
TextColumn,
BarColumn,
SpinnerColumn,
TimeRemainingColumn
)
from sqlalchemy import func
2022-12-21 15:17:13 -08:00
from sqlalchemy.exc import NoResultFound
2022-11-20 16:26:40 -08:00
import groove.db
import groove.path
2022-11-20 16:26:40 -08:00
2022-12-21 15:17:13 -08:00
from groove.exceptions import InvalidPathError
2022-11-20 16:26:40 -08:00
2022-12-21 15:17:13 -08:00
@rich.repr.auto(angular=True)
2022-11-20 16:26:40 -08:00
class MediaScanner:
"""
2022-12-21 15:17:13 -08:00
SYNOPSIS
Scan a directory structure containing audio files and import track entries
into the Groove on Demand database. Existing tracks will be ignored.
USAGE
MediaScanner(db=DB, [ARGS])
ARGS
db An sqlalchemy databse session
console A rich console instance
glob A pattern to search for. Defaults to MEDIA_GLOB. Multiple
patterns can be specifed as a comma-separated-list.
path The path to scan. Defaults to MEDIA_ROOT.
root The media root, as specified by MEDIA_ROOT
EXAMPLES
MediaScanner(db=DB, path='Kid Koala', glob='*.mp3').scan()
>>> 15
INSTANCE ATTRIBUTES
db The databse session
console The rich console instance
glob The globs to search for
path The path to be scanned
root The media root
2022-11-20 16:26:40 -08:00
"""
2022-12-21 15:17:13 -08:00
def __init__(
self,
db: Callable,
path: Union[Path, None] = None,
glob: Union[str, None] = None,
console: Union[Console, None] = None,
) -> None:
2022-11-20 16:26:40 -08:00
self._db = db
2022-12-21 21:16:06 -08:00
self._glob = tuple((glob or os.environ.get('MEDIA_GLOB', '*.mp3,*.flac,*.m4a')).split(','))
2022-12-21 15:17:13 -08:00
self._root = groove.path.media_root()
self._console = console or Console()
self._scanned = 0
self._imported = 0
self._total = 0
self._path = self._configure_path(path)
2022-11-20 16:26:40 -08:00
@property
def db(self) -> Callable:
return self._db
2022-12-21 15:17:13 -08:00
@property
def console(self) -> Console:
return self._console
2022-11-20 16:26:40 -08:00
@property
def root(self) -> Path:
return self._root
2022-12-21 15:17:13 -08:00
@property
def path(self) -> Path:
return self._path
2022-11-20 16:26:40 -08:00
@property
def glob(self) -> tuple:
return self._glob
2022-12-21 15:17:13 -08:00
def _configure_path(self, path):
if not path: # pragma: no cover
return self._root
fullpath = Path(self._root) / Path(path)
if not (fullpath.exists() and fullpath.is_dir()):
raise InvalidPathError( # pragma: no cover
f"[b]{fullpath}[/b] does not exist or is not a directory."
)
return fullpath
2022-11-20 16:26:40 -08:00
2022-11-30 23:42:06 -08:00
def _get_tags(self, path): # pragma: no cover
2022-11-27 18:42:46 -08:00
tags = music_tag.load_file(path)
2022-11-30 23:42:06 -08:00
return {
2022-11-27 18:42:46 -08:00
'artist': str(tags.resolve('album_artist')),
'title': str(tags['title']),
2022-11-30 23:42:06 -08:00
}
2022-12-21 15:17:13 -08:00
def find_sources(self, pattern):
"""
Recursively search the instance path for files matching the pattern.
"""
entrypoint = self._path if self._path else self._root
for path in entrypoint.rglob(pattern): # pragma: no cover
if not path.is_dir():
yield path
def import_tracks(self, sources: Iterable) -> None:
"""
Step through the specified source files and schedule async tasks to
import them, reporting progress via a rich progress bar.
"""
async def _do_import(progress, scanner):
tasks = set()
for path in sources:
self._total += 1
progress.update(scanner, total=self._total)
tasks.add(asyncio.create_task(
self._import_one_track(path, progress, scanner)))
progress.start_task(scanner)
progress = Progress(
TimeRemainingColumn(compact=True, elapsed_when_finished=True),
BarColumn(bar_width=15),
TextColumn("[progress.percentage]{task.percentage:>3.0f}%", justify="left"),
TextColumn("[dim]|"),
TextColumn("[title]{task.total:-6d}[/title] [b]total", justify="right"),
TextColumn("[dim]|"),
TextColumn("[title]{task.fields[imported]:-6d}[/title] [b]new", justify="right"),
TextColumn("[dim]|"),
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
console=self.console,
)
with progress:
scanner = progress.add_task(
f"[bright]Scanning [link]{self.path}[/link] (this may take some time)...",
imported=0,
total=0,
start=False
)
asyncio.run(_do_import(progress, scanner))
progress.update(
scanner,
completed=self._total,
description=f"[bright]Scan of [link]{self.path}[/link] complete!",
)
async def _import_one_track(self, path, progress, scanner):
"""
Import a single audo file into the databse, unless it already exists.
"""
self._scanned += 1
relpath = str(path.relative_to(self.root))
try:
self.db.query(groove.db.track).filter(
groove.db.track.c.relpath == relpath).one()
return
except NoResultFound:
pass
columns = self._get_tags(path)
columns['relpath'] = relpath
logging.debug(f"Importing: {columns}")
self.db.execute(groove.db.track.insert(columns))
self.db.commit()
self._imported += 1
progress.update(
scanner,
imported=self._imported,
completed=self._scanned,
description=f"[bright]Imported [artist]{columns['artist']}[/artist]: [title]{columns['title']}[/title]",
)
2022-11-27 18:42:46 -08:00
2022-11-20 16:26:40 -08:00
def scan(self) -> int:
"""
Walk the media root and insert Track table entries for each media file
found. Existing entries will be ignored.
"""
count = self.db.query(func.count(groove.db.track.c.relpath)).scalar()
2022-12-21 15:17:13 -08:00
combined_sources = chain.from_iterable(
self.find_sources(pattern) for pattern in self.glob
)
self.import_tracks(combined_sources)
newcount = self.db.query(func.count(groove.db.track.c.relpath)).scalar() - count
2022-11-20 16:26:40 -08:00
return newcount