grooveondemand/groove/db/scanner.py

97 lines
3.2 KiB
Python
Raw Normal View History

2022-11-27 18:42:46 -08:00
import asyncio
2022-11-20 16:26:40 -08:00
import logging
import os
2022-11-27 18:42:46 -08:00
import music_tag
2022-11-20 16:26:40 -08:00
from pathlib import Path
from typing import Callable, Union, Iterable
2022-12-10 10:14:06 -08:00
from sqlalchemy import func, delete
2022-11-20 16:26:40 -08:00
import groove.db
import groove.path
2022-11-20 16:26:40 -08:00
class MediaScanner:
"""
Scan a directory structure containing audio files and import them into the database.
"""
def __init__(self, root: Union[Path, None], db: Callable, glob: Union[str, None] = None) -> None:
2022-11-20 16:26:40 -08:00
self._db = db
self._glob = tuple((glob or os.environ.get('MEDIA_GLOB')).split(','))
self._root = root or groove.path.media_root()
2022-11-20 16:26:40 -08:00
logging.debug(f"Configured media scanner for root: {self._root}")
@property
def db(self) -> Callable:
return self._db
@property
def root(self) -> Path:
return self._root
@property
def glob(self) -> tuple:
return self._glob
def find_sources(self, pattern):
2022-11-20 16:44:33 -08:00
return self.root.rglob(pattern) # pragma: no cover
2022-11-20 16:26:40 -08:00
def import_tracks(self, sources: Iterable) -> None:
2022-11-27 18:42:46 -08:00
async def _do_import():
logging.debug("Scanning filesystem (this may take a minute)...")
for path in sources:
2022-12-10 10:14:06 -08:00
if path.exists() and not path.is_dir():
asyncio.create_task(self._import_one_track(path))
2022-11-27 18:42:46 -08:00
asyncio.run(_do_import())
2022-11-20 16:26:40 -08:00
self.db.commit()
2022-12-10 10:14:06 -08:00
def cleanup(self) -> int:
"""
Check for the existence of every track in the databse.
"""
async def _del(track):
path = self.root / Path(track.relpath)
if path.exists():
return
logging.info(f"Deleting missing track {track.relpath}")
self.db.execute(
delete(groove.db.track).where(groove.db.track.c.id == track.id)
)
async def _do_cleanup():
logging.debug("Locating stale track definitions in the database...")
for track in self.db.query(groove.db.track).all():
asyncio.create_task(_del(track))
asyncio.run(_do_cleanup())
self.db.commit()
2022-11-30 23:42:06 -08:00
def _get_tags(self, path): # pragma: no cover
2022-11-27 18:42:46 -08:00
tags = music_tag.load_file(path)
2022-11-30 23:42:06 -08:00
return {
2022-11-27 18:42:46 -08:00
'artist': str(tags.resolve('album_artist')),
'title': str(tags['title']),
2022-11-30 23:42:06 -08:00
}
async def _import_one_track(self, path):
tags = self._get_tags(path)
tags['relpath'] = str(path.relative_to(self.root))
stmt = groove.db.track.insert(tags).prefix_with('OR IGNORE')
2022-11-27 18:42:46 -08:00
logging.debug(f"{tags['artist']} - {tags['title']}")
self.db.execute(stmt)
2022-11-20 16:26:40 -08:00
def scan(self) -> int:
"""
Walk the media root and insert Track table entries for each media file
found. Existing entries will be ignored.
"""
count = self.db.query(func.count(groove.db.track.c.relpath)).scalar()
logging.debug(f"Track table currently contains {count} entries.")
for pattern in self.glob:
self.import_tracks(self.find_sources(pattern))
newcount = self.db.query(func.count(groove.db.track.c.relpath)).scalar() - count
logging.debug(f"Inserted {newcount} new tracks so far this run...")
return newcount
media_scanner = MediaScanner