grooveondemand/groove/db/scanner.py

import asyncio
import logging
import os

from itertools import chain
from pathlib import Path
from typing import Callable, Union, Iterable

import music_tag
import rich.repr

from rich.console import Console
from rich.progress import (
    Progress,
    TextColumn,
    BarColumn,
    SpinnerColumn,
    TimeRemainingColumn
)
from sqlalchemy import func
from sqlalchemy.exc import NoResultFound

import groove.db
import groove.path

from groove.exceptions import InvalidPathError


@rich.repr.auto(angular=True)
class MediaScanner:
    """
    SYNOPSIS

        Scan a directory structure containing audio files and import track entries
        into the Groove on Demand database. Existing tracks will be ignored.

    USAGE

        MediaScanner(db=DB, [ARGS])

    ARGS

        db          An sqlalchemy databse session
        console     A rich console instance
        glob        A pattern to search for. Defaults to MEDIA_GLOB.  Multiple
                    patterns can be specifed as a comma-separated-list.
        path        The path to scan. Defaults to MEDIA_ROOT.
        root        The media root, as specified by MEDIA_ROOT

    EXAMPLES

        MediaScanner(db=DB, path='Kid Koala', glob='*.mp3').scan()
        >>> 15

    INSTANCE ATTRIBUTES

        db          The databse session
        console     The rich console instance
        glob        The globs to search for
        path        The path to be scanned
        root        The media root

    """
    def __init__(
        self,
        db: Callable,
        path: Union[Path, None] = None,
        glob: Union[str, None] = None,
        console: Union[Console, None] = None,
    ) -> None:
        self._db = db
        self._glob = tuple((glob or os.environ.get('MEDIA_GLOB', '*.mp3,*.flac,*.m4a')).split(','))
        self._root = groove.path.media_root()
        self._console = console or Console()
        self._scanned = 0
        self._imported = 0
        self._total = 0
        self._path = self._configure_path(path)

    @property
    def db(self) -> Callable:
        return self._db

    @property
    def console(self) -> Console:
        return self._console

    @property
    def root(self) -> Path:
        return self._root

    @property
    def path(self) -> Path:
        return self._path

    @property
    def glob(self) -> tuple:
        return self._glob

    def _configure_path(self, path):
        if not path:  # pragma: no cover
            return self._root
        fullpath = Path(self._root) / Path(path)
        if not (fullpath.exists() and fullpath.is_dir()):
            raise InvalidPathError(  # pragma: no cover
                f"[b]{fullpath}[/b] does not exist or is not a directory."
            )
        return fullpath

    def _get_tags(self, path):  # pragma: no cover
        tags = music_tag.load_file(path)
        return {
            'artist': str(tags.resolve('album_artist')),
            'title': str(tags['title']),
        }

    def find_sources(self, pattern):
        """
        Recursively search the instance path for files matching the pattern.
        """
        entrypoint = self._path if self._path else self._root
        for path in entrypoint.rglob(pattern):  # pragma: no cover
            if not path.is_dir():
                yield path

    def import_tracks(self, sources: Iterable) -> None:
        """
        Step through the specified source files and schedule async tasks to
        import them, reporting progress via a rich progress bar.
        """

        async def _do_import(progress, scanner):
            tasks = set()
            for path in sources:
                self._total += 1
                progress.update(scanner, total=self._total)
                tasks.add(asyncio.create_task(
                    self._import_one_track(path, progress, scanner)))
            progress.start_task(scanner)

        progress = Progress(
            TimeRemainingColumn(compact=True, elapsed_when_finished=True),
            BarColumn(bar_width=15),
            TextColumn("[progress.percentage]{task.percentage:>3.0f}%", justify="left"),
            TextColumn("[dim]|"),
            TextColumn("[title]{task.total:-6d}[/title] [b]total", justify="right"),
            TextColumn("[dim]|"),
            TextColumn("[title]{task.fields[imported]:-6d}[/title] [b]new", justify="right"),
            TextColumn("[dim]|"),
            SpinnerColumn(),
            TextColumn("[progress.description]{task.description}"),
            console=self.console,
        )
        with progress:
            scanner = progress.add_task(
                f"[bright]Scanning [link]{self.path}[/link] (this may take some time)...",
                imported=0,
                total=0,
                start=False
            )
            asyncio.run(_do_import(progress, scanner))
            progress.update(
                scanner,
                completed=self._total,
                description=f"[bright]Scan of [link]{self.path}[/link] complete!",
            )

    async def _import_one_track(self, path, progress, scanner):
        """
        Import a single audo file into the databse, unless it already exists.
        """
        self._scanned += 1
        relpath = str(path.relative_to(self.root))
        try:
            self.db.query(groove.db.track).filter(
                groove.db.track.c.relpath == relpath).one()
            return
        except NoResultFound:
            pass

        columns = self._get_tags(path)
        columns['relpath'] = relpath

        logging.debug(f"Importing: {columns}")
        self.db.execute(groove.db.track.insert(columns))
        self.db.commit()
        self._imported += 1
        progress.update(
            scanner,
            imported=self._imported,
            completed=self._scanned,
            description=f"[bright]Imported [artist]{columns['artist']}[/artist]: [title]{columns['title']}[/title]",
        )

    def scan(self) -> int:
        """
        Walk the media root and insert Track table entries for each media file
        found. Existing entries will be ignored.
        """
        count = self.db.query(func.count(groove.db.track.c.relpath)).scalar()
        combined_sources = chain.from_iterable(
            self.find_sources(pattern) for pattern in self.glob
        )
        self.import_tracks(combined_sources)
        newcount = self.db.query(func.count(groove.db.track.c.relpath)).scalar() - count
        return newcount
WIP addition of interactive shell 2022-11-27 18:42:46 -08:00			`import asyncio`
Add filesystem scanner 2022-11-20 16:26:40 -08:00			`import logging`
			`import os`
WIP addition of interactive shell 2022-11-27 18:42:46 -08:00
refactor scanner, add progress bar 2022-12-21 15:17:13 -08:00			`from itertools import chain`
Add filesystem scanner 2022-11-20 16:26:40 -08:00			`from pathlib import Path`
			`from typing import Callable, Union, Iterable`
refactor scanner, add progress bar 2022-12-21 15:17:13 -08:00
			`import music_tag`
			`import rich.repr`

			`from rich.console import Console`
			`from rich.progress import (`
			`Progress,`
			`TextColumn,`
			`BarColumn,`
			`SpinnerColumn,`
			`TimeRemainingColumn`
			`)`
Revert "adding cleanup of stale track entries" This reverts commit 1a2506742f706b324e48ea03488372e82390dfd9. 2022-12-10 10:20:30 -08:00			`from sqlalchemy import func`
refactor scanner, add progress bar 2022-12-21 15:17:13 -08:00			`from sqlalchemy.exc import NoResultFound`
Add filesystem scanner 2022-11-20 16:26:40 -08:00
			`import groove.db`
implementing themes and refactoring path operations 2022-12-04 12:09:27 -08:00			`import groove.path`
Add filesystem scanner 2022-11-20 16:26:40 -08:00
refactor scanner, add progress bar 2022-12-21 15:17:13 -08:00			`from groove.exceptions import InvalidPathError`

Add filesystem scanner 2022-11-20 16:26:40 -08:00
refactor scanner, add progress bar 2022-12-21 15:17:13 -08:00			`@rich.repr.auto(angular=True)`
Add filesystem scanner 2022-11-20 16:26:40 -08:00			`class MediaScanner:`
			`"""`
refactor scanner, add progress bar 2022-12-21 15:17:13 -08:00			`SYNOPSIS`

			`Scan a directory structure containing audio files and import track entries`
			`into the Groove on Demand database. Existing tracks will be ignored.`

			`USAGE`

			`MediaScanner(db=DB, [ARGS])`

			`ARGS`

			`db An sqlalchemy databse session`
			`console A rich console instance`
			`glob A pattern to search for. Defaults to MEDIA_GLOB. Multiple`
			`patterns can be specifed as a comma-separated-list.`
			`path The path to scan. Defaults to MEDIA_ROOT.`
			`root The media root, as specified by MEDIA_ROOT`

			`EXAMPLES`

			`MediaScanner(db=DB, path='Kid Koala', glob='*.mp3').scan()`
			`>>> 15`

			`INSTANCE ATTRIBUTES`

			`db The databse session`
			`console The rich console instance`
			`glob The globs to search for`
			`path The path to be scanned`
			`root The media root`

Add filesystem scanner 2022-11-20 16:26:40 -08:00			`"""`
refactor scanner, add progress bar 2022-12-21 15:17:13 -08:00			`def __init__(`
			`self,`
			`db: Callable,`
			`path: Union[Path, None] = None,`
			`glob: Union[str, None] = None,`
			`console: Union[Console, None] = None,`
			`) -> None:`
Add filesystem scanner 2022-11-20 16:26:40 -08:00			`self._db = db`
improved env loading 2022-12-21 21:16:06 -08:00			`self._glob = tuple((glob or os.environ.get('MEDIA_GLOB', '.mp3,.flac,*.m4a')).split(','))`
refactor scanner, add progress bar 2022-12-21 15:17:13 -08:00			`self._root = groove.path.media_root()`
			`self._console = console or Console()`
			`self._scanned = 0`
			`self._imported = 0`
			`self._total = 0`
			`self._path = self._configure_path(path)`
Add filesystem scanner 2022-11-20 16:26:40 -08:00
			`@property`
			`def db(self) -> Callable:`
			`return self._db`

refactor scanner, add progress bar 2022-12-21 15:17:13 -08:00			`@property`
			`def console(self) -> Console:`
			`return self._console`

Add filesystem scanner 2022-11-20 16:26:40 -08:00			`@property`
			`def root(self) -> Path:`
			`return self._root`

refactor scanner, add progress bar 2022-12-21 15:17:13 -08:00			`@property`
			`def path(self) -> Path:`
			`return self._path`

Add filesystem scanner 2022-11-20 16:26:40 -08:00			`@property`
			`def glob(self) -> tuple:`
			`return self._glob`

refactor scanner, add progress bar 2022-12-21 15:17:13 -08:00			`def _configure_path(self, path):`
			`if not path: # pragma: no cover`
			`return self._root`
			`fullpath = Path(self._root) / Path(path)`
			`if not (fullpath.exists() and fullpath.is_dir()):`
			`raise InvalidPathError( # pragma: no cover`
			`f"[b]{fullpath}[/b] does not exist or is not a directory."`
			`)`
			`return fullpath`
Add filesystem scanner 2022-11-20 16:26:40 -08:00
fix scanner tests 2022-11-30 23:42:06 -08:00			`def _get_tags(self, path): # pragma: no cover`
WIP addition of interactive shell 2022-11-27 18:42:46 -08:00			`tags = music_tag.load_file(path)`
fix scanner tests 2022-11-30 23:42:06 -08:00			`return {`
WIP addition of interactive shell 2022-11-27 18:42:46 -08:00			`'artist': str(tags.resolve('album_artist')),`
			`'title': str(tags['title']),`
fix scanner tests 2022-11-30 23:42:06 -08:00			`}`

refactor scanner, add progress bar 2022-12-21 15:17:13 -08:00			`def find_sources(self, pattern):`
			`"""`
			`Recursively search the instance path for files matching the pattern.`
			`"""`
			`entrypoint = self._path if self._path else self._root`
			`for path in entrypoint.rglob(pattern): # pragma: no cover`
			`if not path.is_dir():`
			`yield path`

			`def import_tracks(self, sources: Iterable) -> None:`
			`"""`
			`Step through the specified source files and schedule async tasks to`
			`import them, reporting progress via a rich progress bar.`
			`"""`

			`async def _do_import(progress, scanner):`
			`tasks = set()`
			`for path in sources:`
			`self._total += 1`
			`progress.update(scanner, total=self._total)`
			`tasks.add(asyncio.create_task(`
			`self._import_one_track(path, progress, scanner)))`
			`progress.start_task(scanner)`

			`progress = Progress(`
			`TimeRemainingColumn(compact=True, elapsed_when_finished=True),`
			`BarColumn(bar_width=15),`
			`TextColumn("[progress.percentage]{task.percentage:>3.0f}%", justify="left"),`
			`TextColumn("[dim]\|"),`
			`TextColumn("[title]{task.total:-6d}[/title] [b]total", justify="right"),`
			`TextColumn("[dim]\|"),`
			`TextColumn("[title]{task.fields[imported]:-6d}[/title] [b]new", justify="right"),`
			`TextColumn("[dim]\|"),`
			`SpinnerColumn(),`
			`TextColumn("[progress.description]{task.description}"),`
			`console=self.console,`
			`)`
			`with progress:`
			`scanner = progress.add_task(`
			`f"[bright]Scanning [link]{self.path}[/link] (this may take some time)...",`
			`imported=0,`
			`total=0,`
			`start=False`
			`)`
			`asyncio.run(_do_import(progress, scanner))`
			`progress.update(`
			`scanner,`
			`completed=self._total,`
			`description=f"[bright]Scan of [link]{self.path}[/link] complete!",`
			`)`

			`async def _import_one_track(self, path, progress, scanner):`
			`"""`
			`Import a single audo file into the databse, unless it already exists.`
			`"""`
			`self._scanned += 1`
			`relpath = str(path.relative_to(self.root))`
			`try:`
			`self.db.query(groove.db.track).filter(`
			`groove.db.track.c.relpath == relpath).one()`
			`return`
			`except NoResultFound:`
			`pass`

			`columns = self._get_tags(path)`
			`columns['relpath'] = relpath`

			`logging.debug(f"Importing: {columns}")`
			`self.db.execute(groove.db.track.insert(columns))`
			`self.db.commit()`
			`self._imported += 1`
			`progress.update(`
			`scanner,`
			`imported=self._imported,`
			`completed=self._scanned,`
			`description=f"[bright]Imported [artist]{columns['artist']}[/artist]: [title]{columns['title']}[/title]",`
			`)`
WIP addition of interactive shell 2022-11-27 18:42:46 -08:00
Add filesystem scanner 2022-11-20 16:26:40 -08:00			`def scan(self) -> int:`
			`"""`
			`Walk the media root and insert Track table entries for each media file`
			`found. Existing entries will be ignored.`
			`"""`
			`count = self.db.query(func.count(groove.db.track.c.relpath)).scalar()`
refactor scanner, add progress bar 2022-12-21 15:17:13 -08:00			`combined_sources = chain.from_iterable(`
			`self.find_sources(pattern) for pattern in self.glob`
			`)`
			`self.import_tracks(combined_sources)`
			`newcount = self.db.query(func.count(groove.db.track.c.relpath)).scalar() - count`
Add filesystem scanner 2022-11-20 16:26:40 -08:00			`return newcount`