Add media management scripts

This commit is contained in:
Tony Blyler 2021-07-09 13:04:58 -04:00
parent c591770969
commit f80411393c
2 changed files with 351 additions and 0 deletions

227
media-duplicates Executable file
View file

@ -0,0 +1,227 @@
#!/usr/bin/python3
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Tuple
import argparse
import json
import os
import random
import requests
import shlex
import sqlite3
import subprocess
import sys
def ffprobe_file_path(file_path: str):
result = subprocess.run(
["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", file_path],
capture_output=True,
)
result.check_returncode()
return json.loads(result.stdout)
def get_first_video_stream(ffprobe_info):
for stream in ffprobe_info["streams"]:
if stream["codec_type"] == "video":
return stream
raise Exception("there is no video stream")
def codec_name_is_h265(codec_name: str) -> bool:
return codec_name.find("265") or codec_name.find("HEVC") or codec_name.find("hevc")
# returns less than 0 if first is worse than second
# returns greater than 0 if first is better than second
# returns 0 if they are roughly the same
def compare_ffprobe_output(first, second) -> int:
first_stream = get_first_video_stream(first)
second_stream = get_first_video_stream(second)
# whichever has the most surface area, aka, highest resolution wins by default
first_area = first_stream["width"] * first_stream["height"]
second_area = second_stream["width"] * second_stream["height"]
if first_area > second_area:
return 1
if second_area > first_area:
return -1
# if resolutions are the same, prefer h265
first_is_h265 = codec_name_is_h265(first_stream["codec_name"])
second_is_h265 = codec_name_is_h265(second_stream["codec_name"])
if first_is_h265:
if not second_is_h265:
return 1
elif second_is_h265:
return -1
# if both are h265 or both are not h265, use bitrate
first_bitrate = first["format"]["bit_rate"]
second_bitrate = second["format"]["bit_rate"]
if first_bitrate > second_bitrate:
return 1
if second_bitrate > first_bitrate:
return -1
# at this point, I consider these files to be the same
return 0
def compare_radarr(radarr_db_path1: str, radarr_db_path2: str) -> List[Tuple[str, str]]:
result = []
with sqlite3.connect(":memory:", uri=True) as db:
db.execute("ATTACH DATABASE ? AS ?", (f"file:{radarr_db_path1}?mode=ro", "db1"))
db.execute("ATTACH DATABASE ? AS ?", (f"file:{radarr_db_path2}?mode=ro", "db2"))
rows = db.execute("""
SELECT m1.Path || '/' || mf1.RelativePath, m2.Path || '/' || mf2.RelativePath
FROM db1.Movies AS m1
JOIN db2.Movies AS m2 ON m1.ImdbId = m2.ImdbId
JOIN db1.MovieFiles AS mf1 ON m1.MovieFileId = mf1.Id
JOIN db2.MovieFiles AS mf2 ON m2.MovieFileId = mf2.Id
""").fetchall()
for path1, path2 in rows:
result.append((path1, path2))
return result
def compare_sonarr(sonarr_db_path1: str, sonarr_db_path2: str) -> List[Tuple[str, str]]:
result = []
with sqlite3.connect(":memory:", uri=True) as db:
db.execute("ATTACH DATABASE ? AS ?", (f"file:{sonarr_db_path1}?mode=ro", "db1"))
db.execute("ATTACH DATABASE ? AS ?", (f"file:{sonarr_db_path2}?mode=ro", "db2"))
rows = db.execute("""
SELECT s1.Path || '/' || ef1.RelativePath, s2.Path || '/' || ef2.RelativePath
FROM db1.Series AS s1
JOIN db2.Series AS s2 ON s1.ImdbId = s2.ImdbId
JOIN db1.Episodes AS e1 ON e1.SeriesId = s1.Id
JOIN db2.Episodes AS e2 ON e2.SeriesId = s2.Id AND e2.SeasonNumber = e1.SeasonNumber AND e2.EpisodeNumber = e1.EpisodeNumber
JOIN db1.EpisodeFiles AS ef1 ON e1.EpisodeFileId = ef1.Id
JOIN db2.EpisodeFiles AS ef2 ON e2.EpisodeFileId = ef2.Id
""").fetchall()
for path1, path2 in rows:
result.append((path1, path2))
return result
def rewrite_path(path: str, rewrites: Dict[str, str]) -> str:
if rewrites:
for orig, rewrite in rewrites.items():
if path.startswith(orig):
return path.replace(orig, rewrite, 1)
return path
def rejigger_rewrite_paths(rewrite_paths: List[str]) -> Dict[str, str]:
result = {}
for rewrite_path in rewrite_paths:
orig, rewrite = rewrite_path.split(sep=':', maxsplit=1)
result[orig] = rewrite
return result
def compare_media_file_paths(path1: str, path2: str):
try:
stat1 = os.lstat(path1)
stat2 = os.lstat(path2)
if stat1.st_ino == stat2.st_ino and stat1.st_dev == stat2.st_dev:
return None
except Exception as e:
print(f"failed to check inodes for {path1} and {path2}: {e}", file=sys.stderr)
return None
try:
return compare_ffprobe_output(ffprobe_file_path(path1), ffprobe_file_path(path2))
except Exception as e:
print(f"failed to compare ffprobe output for {path1} and {path2}: {e}", file=sys.stderr)
return None
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description="radarr & sonarr duplicate checker across two database files",
)
parser.add_argument('--sonarr-db-paths', type=argparse.FileType('r'), nargs=2, metavar=("/home/sonarr/sonarr.db", "/home/othersonarr/sonarr.db"), required=True)
parser.add_argument('--radarr-db-paths', type=argparse.FileType('r'), nargs=2, metavar=("/home/radarr/radarr.db", "/home/otherradarr/radarr.db"), required=True)
parser.add_argument('--path-prefix-rewrite', type=str, nargs='+', metavar=("/media:/mnt/media", "/data:/real/path/data"), help="rewrite the paths for internal database media paths to the 'real' destinations")
parser.add_argument('--command-output-file', type=argparse.FileType('w'), nargs='?', default=sys.stdout)
parser.add_argument('--slack-webhook-url', type=str, nargs='?', default=None)
args = parser.parse_args()
for file_descriptor in args.sonarr_db_paths + args.radarr_db_paths:
file_descriptor.close()
duplicate_paths = compare_sonarr(args.sonarr_db_paths[0].name, args.sonarr_db_paths[1].name) + compare_radarr(args.radarr_db_paths[0].name, args.radarr_db_paths[1].name)
if args.path_prefix_rewrite:
rewrite_paths = rejigger_rewrite_paths(args.path_prefix_rewrite)
duplicate_paths = [(rewrite_path(path1, rewrite_paths), rewrite_path(path2, rewrite_paths)) for path1, path2 in duplicate_paths]
already_saved_space_in_bytes = 0
space_savings_in_bytes = 0
files_to_link = 0
files_already_linked = 0
# createa a process pool for NumCPUs * 2
# this is really just used for `ffprobe` concurrent calls
with ThreadPoolExecutor(max_workers=len(os.sched_getaffinity(0))*2) as executor:
def map_helper(args):
return compare_media_file_paths(args[0], args[1])
for (path1, path2), probe_result in zip(duplicate_paths, executor.map(map_helper, duplicate_paths)):
if probe_result is None:
files_already_linked += 1
already_saved_space_in_bytes += os.lstat(path1).st_size
continue
if probe_result == 0:
# if the results seem equal, let us flip a figurative coin for which path to overwrite
print("# this is the result of a coin flip", file=args.command_output_file)
path1, path2 = random.sample([path1, path2], k=2)
elif probe_result < 0:
# path2 is better than path1
path1, path2 = path2, path1
file_size_in_bytes = os.lstat(path2).st_size
space_savings_in_bytes += file_size_in_bytes
files_to_link += 1
print(f"# space savings of {file_size_in_bytes/(1024**3):.2f} GiB", file=args.command_output_file)
print(f"rm -f {shlex.quote(path2)}", file=args.command_output_file)
# replace path2's file name with path1's file name
path2 = os.path.join(os.path.dirname(path2), os.path.basename(path1))
print(f"ln -f {shlex.quote(path1)} {shlex.quote(path2)}", file=args.command_output_file)
if args.slack_webhook_url:
message = "nothing to link for file deduplication"
if files_to_link > 0:
message = f"HOLY SMOKES! We can save {space_savings_in_bytes/(1024**3):.2f} GiB by hard linking {files_to_link} files"
message += f"\nWe already save {already_saved_space_in_bytes/(1024**3):.2f} GiB by hard linking {files_already_linked} files"
requests.post(
args.slack_webhook_url,
headers={
'Content-type': 'application/json',
},
data=json.dumps({
"username": "Apollo Media Duplicates",
"icon_emoji": ":floppy_disk:",
"text": message,
}),
).raise_for_status()

124
refresh-libraries Executable file
View file

@ -0,0 +1,124 @@
#!/usr/bin/python3
from typing import List
from urllib.parse import urljoin
import argparse
import json
import os
import requests
import sys
import xml.etree.ElementTree as xml
def refresh_radarr_library(base_url: str, api_key: str) -> None:
requests.post(
urljoin(f"{base_url}/", "api/v3/command"),
params={
"apiKey": api_key,
},
headers={
"Content-Type": "application/json",
},
data=json.dumps({
"name": "RefreshMovie",
}),
).raise_for_status()
def refresh_sonarr_library(base_url: str, api_key: str) -> None:
requests.post(
urljoin(f"{base_url}/", "api/command"),
params={
"apikey": api_key,
},
headers={
"Content-Type": "application/json",
},
data=json.dumps({
"name": "RefreshSeries",
}),
).raise_for_status()
class PlexClient:
def __init__(self, plex_base_url: str, plex_token: str):
self.__plex_base_url = plex_base_url
self.__plex_token = plex_token
def __create_plex_request(self, relative_path: str) -> str:
response = requests.get(
urljoin(f"{self.__plex_base_url}/", relative_path),
timeout=3,
headers={
"X-Plex-Token": self.__plex_token,
},
)
response.raise_for_status()
return response.text
def get_plex_library_sections(self) -> List[str]:
sections = xml.fromstring(self.__create_plex_request("library/sections"))
return [section.attrib["key"] for section in sections]
def refresh_plex_library_section(self, section_id: str) -> None:
self.__create_plex_request(f"library/sections/{section_id}/refresh")
def refresh_all_plex_libraries(self) -> None:
for section_id in self.get_plex_library_sections():
self.refresh_plex_library_section(section_id)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description="Radarr, Sonarr, & Plex library refresher",
epilog="All arguments will also fallback to environment variable settings of their name in capitalized snake case if not set. For example: if --plex-token isn't set, it will try to read the PLEX_TOKEN environment variable value",
)
parser.add_argument("--plex-base-url", type=str, nargs="?", default=os.getenv("PLEX_BASE_URL"), metavar="http://example.com:32400")
parser.add_argument("--plex-token", type=str, nargs="?", default=os.getenv("PLEX_TOKEN"), metavar="Z99EuQ9Xy9G9z9PQFl99")
parser.add_argument("--sonarr-base-url", type=str, nargs="?", default=os.getenv("SONARR_BASE_URL"), metavar="http://example.com/sonarr")
parser.add_argument("--sonarr-api-key", type=str, nargs="?", default=os.getenv("SONARR_API_KEY"), metavar="6ce909a20a634f7cbe245e1893865ee5")
parser.add_argument("--radarr-base-url", type=str, nargs="?", default=os.getenv("RADARR_BASE_URL"), metavar="http://example.com/radarr")
parser.add_argument("--radarr-api-key", type=str, nargs="?", default=os.getenv("RADARR_API_KEY"), metavar="7ce909a20a634f7cbe245e1893865ee8")
args = parser.parse_args()
print_help = True
if bool(args.plex_base_url) != bool(args.plex_token):
print("if refreshing Plex, both the base url & token must be set", file=sys.stderr)
sys.exit(1)
if bool(args.sonarr_base_url) != bool(args.sonarr_api_key):
print("if refreshing sonarr, both the base url & API key must be set", file=sys.stderr)
sys.exit(2)
if bool(args.radarr_base_url) != bool(args.radarr_api_key):
print("if refreshing radarr, both the base url & API key must be set", file=sys.stderr)
sys.exit(3)
if args.plex_base_url:
print_help = False
print(f"refreshing all Plex libraries at {args.plex_base_url}")
PlexClient(args.plex_base_url, args.plex_token).refresh_all_plex_libraries()
print(f"successfully initiated a refresh of all Plex libraries at {args.plex_base_url}")
if args.sonarr_base_url:
print_help = False
print(f"refreshing sonarr library at {args.sonarr_base_url}")
refresh_sonarr_library(args.sonarr_base_url, args.sonarr_api_key)
print(f"successfully initiated a refresh for the sonarr library at {args.sonarr_base_url}")
if args.radarr_base_url:
print_help = False
print(f"refreshing radarr library at {args.radarr_base_url}")
refresh_radarr_library(args.radarr_base_url, args.radarr_api_key)
print(f"successfully initiated a refresh for the radarr library at {args.radarr_base_url}")
if print_help:
parser.print_help()
sys.exit(-1)