diff --git a/media-duplicates b/media-duplicates new file mode 100755 index 0000000..02b4535 --- /dev/null +++ b/media-duplicates @@ -0,0 +1,227 @@ +#!/usr/bin/python3 +from concurrent.futures import ThreadPoolExecutor +from typing import Dict, List, Tuple +import argparse +import json +import os +import random +import requests +import shlex +import sqlite3 +import subprocess +import sys + +def ffprobe_file_path(file_path: str): + result = subprocess.run( + ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", file_path], + capture_output=True, + ) + + result.check_returncode() + + return json.loads(result.stdout) + + +def get_first_video_stream(ffprobe_info): + for stream in ffprobe_info["streams"]: + if stream["codec_type"] == "video": + return stream + + raise Exception("there is no video stream") + + +def codec_name_is_h265(codec_name: str) -> bool: + return codec_name.find("265") or codec_name.find("HEVC") or codec_name.find("hevc") + + +# returns less than 0 if first is worse than second +# returns greater than 0 if first is better than second +# returns 0 if they are roughly the same +def compare_ffprobe_output(first, second) -> int: + first_stream = get_first_video_stream(first) + second_stream = get_first_video_stream(second) + + # whichever has the most surface area, aka, highest resolution wins by default + first_area = first_stream["width"] * first_stream["height"] + second_area = second_stream["width"] * second_stream["height"] + if first_area > second_area: + return 1 + if second_area > first_area: + return -1 + + # if resolutions are the same, prefer h265 + first_is_h265 = codec_name_is_h265(first_stream["codec_name"]) + second_is_h265 = codec_name_is_h265(second_stream["codec_name"]) + + if first_is_h265: + if not second_is_h265: + return 1 + elif second_is_h265: + return -1 + + # if both are h265 or both are not h265, use bitrate + first_bitrate = first["format"]["bit_rate"] + second_bitrate = second["format"]["bit_rate"] + + if first_bitrate > second_bitrate: + return 1 + + if second_bitrate > first_bitrate: + return -1 + + # at this point, I consider these files to be the same + return 0 + + +def compare_radarr(radarr_db_path1: str, radarr_db_path2: str) -> List[Tuple[str, str]]: + result = [] + + with sqlite3.connect(":memory:", uri=True) as db: + db.execute("ATTACH DATABASE ? AS ?", (f"file:{radarr_db_path1}?mode=ro", "db1")) + db.execute("ATTACH DATABASE ? AS ?", (f"file:{radarr_db_path2}?mode=ro", "db2")) + + rows = db.execute(""" + SELECT m1.Path || '/' || mf1.RelativePath, m2.Path || '/' || mf2.RelativePath + FROM db1.Movies AS m1 + JOIN db2.Movies AS m2 ON m1.ImdbId = m2.ImdbId + JOIN db1.MovieFiles AS mf1 ON m1.MovieFileId = mf1.Id + JOIN db2.MovieFiles AS mf2 ON m2.MovieFileId = mf2.Id + """).fetchall() + + for path1, path2 in rows: + result.append((path1, path2)) + + return result + + +def compare_sonarr(sonarr_db_path1: str, sonarr_db_path2: str) -> List[Tuple[str, str]]: + result = [] + + with sqlite3.connect(":memory:", uri=True) as db: + db.execute("ATTACH DATABASE ? AS ?", (f"file:{sonarr_db_path1}?mode=ro", "db1")) + db.execute("ATTACH DATABASE ? AS ?", (f"file:{sonarr_db_path2}?mode=ro", "db2")) + + rows = db.execute(""" + SELECT s1.Path || '/' || ef1.RelativePath, s2.Path || '/' || ef2.RelativePath + FROM db1.Series AS s1 + JOIN db2.Series AS s2 ON s1.ImdbId = s2.ImdbId + JOIN db1.Episodes AS e1 ON e1.SeriesId = s1.Id + JOIN db2.Episodes AS e2 ON e2.SeriesId = s2.Id AND e2.SeasonNumber = e1.SeasonNumber AND e2.EpisodeNumber = e1.EpisodeNumber + JOIN db1.EpisodeFiles AS ef1 ON e1.EpisodeFileId = ef1.Id + JOIN db2.EpisodeFiles AS ef2 ON e2.EpisodeFileId = ef2.Id + """).fetchall() + + for path1, path2 in rows: + result.append((path1, path2)) + + return result + +def rewrite_path(path: str, rewrites: Dict[str, str]) -> str: + if rewrites: + for orig, rewrite in rewrites.items(): + if path.startswith(orig): + return path.replace(orig, rewrite, 1) + + return path + +def rejigger_rewrite_paths(rewrite_paths: List[str]) -> Dict[str, str]: + result = {} + + for rewrite_path in rewrite_paths: + orig, rewrite = rewrite_path.split(sep=':', maxsplit=1) + result[orig] = rewrite + + return result + +def compare_media_file_paths(path1: str, path2: str): + try: + stat1 = os.lstat(path1) + stat2 = os.lstat(path2) + if stat1.st_ino == stat2.st_ino and stat1.st_dev == stat2.st_dev: + return None + except Exception as e: + print(f"failed to check inodes for {path1} and {path2}: {e}", file=sys.stderr) + return None + + try: + return compare_ffprobe_output(ffprobe_file_path(path1), ffprobe_file_path(path2)) + except Exception as e: + print(f"failed to compare ffprobe output for {path1} and {path2}: {e}", file=sys.stderr) + return None + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="radarr & sonarr duplicate checker across two database files", + ) + + parser.add_argument('--sonarr-db-paths', type=argparse.FileType('r'), nargs=2, metavar=("/home/sonarr/sonarr.db", "/home/othersonarr/sonarr.db"), required=True) + parser.add_argument('--radarr-db-paths', type=argparse.FileType('r'), nargs=2, metavar=("/home/radarr/radarr.db", "/home/otherradarr/radarr.db"), required=True) + parser.add_argument('--path-prefix-rewrite', type=str, nargs='+', metavar=("/media:/mnt/media", "/data:/real/path/data"), help="rewrite the paths for internal database media paths to the 'real' destinations") + parser.add_argument('--command-output-file', type=argparse.FileType('w'), nargs='?', default=sys.stdout) + parser.add_argument('--slack-webhook-url', type=str, nargs='?', default=None) + args = parser.parse_args() + + for file_descriptor in args.sonarr_db_paths + args.radarr_db_paths: + file_descriptor.close() + + duplicate_paths = compare_sonarr(args.sonarr_db_paths[0].name, args.sonarr_db_paths[1].name) + compare_radarr(args.radarr_db_paths[0].name, args.radarr_db_paths[1].name) + + if args.path_prefix_rewrite: + rewrite_paths = rejigger_rewrite_paths(args.path_prefix_rewrite) + duplicate_paths = [(rewrite_path(path1, rewrite_paths), rewrite_path(path2, rewrite_paths)) for path1, path2 in duplicate_paths] + + already_saved_space_in_bytes = 0 + space_savings_in_bytes = 0 + files_to_link = 0 + files_already_linked = 0 + + # createa a process pool for NumCPUs * 2 + # this is really just used for `ffprobe` concurrent calls + with ThreadPoolExecutor(max_workers=len(os.sched_getaffinity(0))*2) as executor: + def map_helper(args): + return compare_media_file_paths(args[0], args[1]) + + for (path1, path2), probe_result in zip(duplicate_paths, executor.map(map_helper, duplicate_paths)): + if probe_result is None: + files_already_linked += 1 + already_saved_space_in_bytes += os.lstat(path1).st_size + continue + + if probe_result == 0: + # if the results seem equal, let us flip a figurative coin for which path to overwrite + print("# this is the result of a coin flip", file=args.command_output_file) + path1, path2 = random.sample([path1, path2], k=2) + elif probe_result < 0: + # path2 is better than path1 + path1, path2 = path2, path1 + + file_size_in_bytes = os.lstat(path2).st_size + space_savings_in_bytes += file_size_in_bytes + files_to_link += 1 + + print(f"# space savings of {file_size_in_bytes/(1024**3):.2f} GiB", file=args.command_output_file) + print(f"rm -f {shlex.quote(path2)}", file=args.command_output_file) + + # replace path2's file name with path1's file name + path2 = os.path.join(os.path.dirname(path2), os.path.basename(path1)) + print(f"ln -f {shlex.quote(path1)} {shlex.quote(path2)}", file=args.command_output_file) + + if args.slack_webhook_url: + message = "nothing to link for file deduplication" + if files_to_link > 0: + message = f"HOLY SMOKES! We can save {space_savings_in_bytes/(1024**3):.2f} GiB by hard linking {files_to_link} files" + + message += f"\nWe already save {already_saved_space_in_bytes/(1024**3):.2f} GiB by hard linking {files_already_linked} files" + + requests.post( + args.slack_webhook_url, + headers={ + 'Content-type': 'application/json', + }, + data=json.dumps({ + "username": "Apollo Media Duplicates", + "icon_emoji": ":floppy_disk:", + "text": message, + }), + ).raise_for_status() diff --git a/refresh-libraries b/refresh-libraries new file mode 100755 index 0000000..6307edd --- /dev/null +++ b/refresh-libraries @@ -0,0 +1,124 @@ +#!/usr/bin/python3 +from typing import List +from urllib.parse import urljoin +import argparse +import json +import os +import requests +import sys +import xml.etree.ElementTree as xml + +def refresh_radarr_library(base_url: str, api_key: str) -> None: + requests.post( + urljoin(f"{base_url}/", "api/v3/command"), + params={ + "apiKey": api_key, + }, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps({ + "name": "RefreshMovie", + }), + ).raise_for_status() + + +def refresh_sonarr_library(base_url: str, api_key: str) -> None: + requests.post( + urljoin(f"{base_url}/", "api/command"), + params={ + "apikey": api_key, + }, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps({ + "name": "RefreshSeries", + }), + ).raise_for_status() + + +class PlexClient: + def __init__(self, plex_base_url: str, plex_token: str): + self.__plex_base_url = plex_base_url + self.__plex_token = plex_token + + + def __create_plex_request(self, relative_path: str) -> str: + response = requests.get( + urljoin(f"{self.__plex_base_url}/", relative_path), + timeout=3, + headers={ + "X-Plex-Token": self.__plex_token, + }, + ) + + response.raise_for_status() + + return response.text + + + def get_plex_library_sections(self) -> List[str]: + sections = xml.fromstring(self.__create_plex_request("library/sections")) + + return [section.attrib["key"] for section in sections] + + + def refresh_plex_library_section(self, section_id: str) -> None: + self.__create_plex_request(f"library/sections/{section_id}/refresh") + + + def refresh_all_plex_libraries(self) -> None: + for section_id in self.get_plex_library_sections(): + self.refresh_plex_library_section(section_id) + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="Radarr, Sonarr, & Plex library refresher", + epilog="All arguments will also fallback to environment variable settings of their name in capitalized snake case if not set. For example: if --plex-token isn't set, it will try to read the PLEX_TOKEN environment variable value", + ) + + parser.add_argument("--plex-base-url", type=str, nargs="?", default=os.getenv("PLEX_BASE_URL"), metavar="http://example.com:32400") + parser.add_argument("--plex-token", type=str, nargs="?", default=os.getenv("PLEX_TOKEN"), metavar="Z99EuQ9Xy9G9z9PQFl99") + parser.add_argument("--sonarr-base-url", type=str, nargs="?", default=os.getenv("SONARR_BASE_URL"), metavar="http://example.com/sonarr") + parser.add_argument("--sonarr-api-key", type=str, nargs="?", default=os.getenv("SONARR_API_KEY"), metavar="6ce909a20a634f7cbe245e1893865ee5") + parser.add_argument("--radarr-base-url", type=str, nargs="?", default=os.getenv("RADARR_BASE_URL"), metavar="http://example.com/radarr") + parser.add_argument("--radarr-api-key", type=str, nargs="?", default=os.getenv("RADARR_API_KEY"), metavar="7ce909a20a634f7cbe245e1893865ee8") + + args = parser.parse_args() + + print_help = True + + if bool(args.plex_base_url) != bool(args.plex_token): + print("if refreshing Plex, both the base url & token must be set", file=sys.stderr) + sys.exit(1) + + if bool(args.sonarr_base_url) != bool(args.sonarr_api_key): + print("if refreshing sonarr, both the base url & API key must be set", file=sys.stderr) + sys.exit(2) + + if bool(args.radarr_base_url) != bool(args.radarr_api_key): + print("if refreshing radarr, both the base url & API key must be set", file=sys.stderr) + sys.exit(3) + + if args.plex_base_url: + print_help = False + print(f"refreshing all Plex libraries at {args.plex_base_url}") + PlexClient(args.plex_base_url, args.plex_token).refresh_all_plex_libraries() + print(f"successfully initiated a refresh of all Plex libraries at {args.plex_base_url}") + + if args.sonarr_base_url: + print_help = False + print(f"refreshing sonarr library at {args.sonarr_base_url}") + refresh_sonarr_library(args.sonarr_base_url, args.sonarr_api_key) + print(f"successfully initiated a refresh for the sonarr library at {args.sonarr_base_url}") + + if args.radarr_base_url: + print_help = False + print(f"refreshing radarr library at {args.radarr_base_url}") + refresh_radarr_library(args.radarr_base_url, args.radarr_api_key) + print(f"successfully initiated a refresh for the radarr library at {args.radarr_base_url}") + + if print_help: + parser.print_help() + sys.exit(-1)