Add media management scripts

2021-07-09 13:04:58 -04:00 · 2021-07-09 13:04:58 -04:00 · f80411393c
commit f80411393c
parent c591770969
2 changed files with 351 additions and 0 deletions
--- a/227
+++ b/227
@ -0,0 +1,227 @@
+#!/usr/bin/python3
+from concurrent.futures import ThreadPoolExecutor
+from typing import Dict, List, Tuple
+import argparse
+import json
+import os
+import random
+import requests
+import shlex
+import sqlite3
+import subprocess
+import sys
+
+def ffprobe_file_path(file_path: str):
+    result = subprocess.run(
+        ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", file_path],
+        capture_output=True,
+    )
+
+    result.check_returncode()
+
+    return json.loads(result.stdout)
+
+
+def get_first_video_stream(ffprobe_info):
+    for stream in ffprobe_info["streams"]:
+        if stream["codec_type"] == "video":
+            return stream
+
+    raise Exception("there is no video stream")
+
+
+def codec_name_is_h265(codec_name: str) -> bool:
+    return codec_name.find("265") or codec_name.find("HEVC") or codec_name.find("hevc")
+
+
+# returns less than 0 if first is worse than second
+# returns greater than 0 if first is better than second
+# returns 0 if they are roughly the same
+def compare_ffprobe_output(first, second) -> int:
+    first_stream = get_first_video_stream(first)
+    second_stream = get_first_video_stream(second)
+
+    # whichever has the most surface area, aka, highest resolution wins by default
+    first_area = first_stream["width"] * first_stream["height"]
+    second_area = second_stream["width"] * second_stream["height"]
+    if first_area > second_area:
+        return 1
+    if second_area > first_area:
+        return -1
+
+    # if resolutions are the same, prefer h265
+    first_is_h265 = codec_name_is_h265(first_stream["codec_name"])
+    second_is_h265 = codec_name_is_h265(second_stream["codec_name"])
+
+    if first_is_h265:
+        if not second_is_h265:
+            return 1
+    elif second_is_h265:
+        return -1
+
+    # if both are h265 or both are not h265, use bitrate
+    first_bitrate = first["format"]["bit_rate"]
+    second_bitrate = second["format"]["bit_rate"]
+
+    if first_bitrate > second_bitrate:
+        return 1
+
+    if second_bitrate > first_bitrate:
+        return -1
+
+    # at this point, I consider these files to be the same
+    return 0
+
+
+def compare_radarr(radarr_db_path1: str, radarr_db_path2: str) -> List[Tuple[str, str]]:
+    result = []
+
+    with sqlite3.connect(":memory:", uri=True) as db:
+        db.execute("ATTACH DATABASE ? AS ?", (f"file:{radarr_db_path1}?mode=ro", "db1"))
+        db.execute("ATTACH DATABASE ? AS ?", (f"file:{radarr_db_path2}?mode=ro", "db2"))
+
+        rows = db.execute("""
+            SELECT m1.Path || '/' || mf1.RelativePath, m2.Path || '/' || mf2.RelativePath
+                FROM db1.Movies     AS m1
+                JOIN db2.Movies     AS m2  ON m1.ImdbId = m2.ImdbId
+                JOIN db1.MovieFiles AS mf1 ON m1.MovieFileId = mf1.Id
+                JOIN db2.MovieFiles AS mf2 ON m2.MovieFileId = mf2.Id
+        """).fetchall()
+
+        for path1, path2 in rows:
+            result.append((path1, path2))
+
+    return result
+
+
+def compare_sonarr(sonarr_db_path1: str, sonarr_db_path2: str) -> List[Tuple[str, str]]:
+    result = []
+
+    with sqlite3.connect(":memory:", uri=True) as db:
+        db.execute("ATTACH DATABASE ? AS ?", (f"file:{sonarr_db_path1}?mode=ro", "db1"))
+        db.execute("ATTACH DATABASE ? AS ?", (f"file:{sonarr_db_path2}?mode=ro", "db2"))
+
+        rows = db.execute("""
+            SELECT s1.Path || '/' || ef1.RelativePath, s2.Path || '/' || ef2.RelativePath
+                FROM db1.Series       AS s1
+                JOIN db2.Series       AS s2  ON s1.ImdbId        = s2.ImdbId
+                JOIN db1.Episodes     AS e1  ON e1.SeriesId      = s1.Id
+                JOIN db2.Episodes     AS e2  ON e2.SeriesId      = s2.Id AND e2.SeasonNumber = e1.SeasonNumber AND e2.EpisodeNumber = e1.EpisodeNumber
+                JOIN db1.EpisodeFiles AS ef1 ON e1.EpisodeFileId = ef1.Id
+                JOIN db2.EpisodeFiles AS ef2 ON e2.EpisodeFileId = ef2.Id
+        """).fetchall()
+
+        for path1, path2 in rows:
+            result.append((path1, path2))
+
+    return result
+
+def rewrite_path(path: str, rewrites: Dict[str, str]) -> str:
+    if rewrites:
+        for orig, rewrite in rewrites.items():
+            if path.startswith(orig):
+                return path.replace(orig, rewrite, 1)
+
+    return path
+
+def rejigger_rewrite_paths(rewrite_paths: List[str]) -> Dict[str, str]:
+    result = {}
+
+    for rewrite_path in rewrite_paths:
+        orig, rewrite = rewrite_path.split(sep=':', maxsplit=1)
+        result[orig] = rewrite
+
+    return result
+
+def compare_media_file_paths(path1: str, path2: str):
+    try:
+        stat1 = os.lstat(path1)
+        stat2 = os.lstat(path2)
+        if stat1.st_ino == stat2.st_ino and stat1.st_dev == stat2.st_dev:
+            return None
+    except Exception as e:
+        print(f"failed to check inodes for {path1} and {path2}: {e}", file=sys.stderr)
+        return None
+
+    try:
+        return compare_ffprobe_output(ffprobe_file_path(path1), ffprobe_file_path(path2))
+    except Exception as e:
+        print(f"failed to compare ffprobe output for {path1} and {path2}: {e}", file=sys.stderr)
+        return None
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description="radarr & sonarr duplicate checker across two database files",
+    )
+
+    parser.add_argument('--sonarr-db-paths', type=argparse.FileType('r'), nargs=2, metavar=("/home/sonarr/sonarr.db", "/home/othersonarr/sonarr.db"), required=True)
+    parser.add_argument('--radarr-db-paths', type=argparse.FileType('r'), nargs=2, metavar=("/home/radarr/radarr.db", "/home/otherradarr/radarr.db"), required=True)
+    parser.add_argument('--path-prefix-rewrite', type=str, nargs='+', metavar=("/media:/mnt/media", "/data:/real/path/data"), help="rewrite the paths for internal database media paths to the 'real' destinations")
+    parser.add_argument('--command-output-file', type=argparse.FileType('w'), nargs='?', default=sys.stdout)
+    parser.add_argument('--slack-webhook-url', type=str, nargs='?', default=None)
+    args = parser.parse_args()
+
+    for file_descriptor in args.sonarr_db_paths + args.radarr_db_paths:
+        file_descriptor.close()
+
+    duplicate_paths = compare_sonarr(args.sonarr_db_paths[0].name, args.sonarr_db_paths[1].name) + compare_radarr(args.radarr_db_paths[0].name, args.radarr_db_paths[1].name)
+
+    if args.path_prefix_rewrite:
+        rewrite_paths = rejigger_rewrite_paths(args.path_prefix_rewrite)
+        duplicate_paths = [(rewrite_path(path1, rewrite_paths), rewrite_path(path2, rewrite_paths)) for path1, path2 in duplicate_paths]
+
+    already_saved_space_in_bytes = 0
+    space_savings_in_bytes = 0
+    files_to_link = 0
+    files_already_linked = 0
+
+    # createa a process pool for NumCPUs * 2
+    # this is really just used for `ffprobe` concurrent calls
+    with ThreadPoolExecutor(max_workers=len(os.sched_getaffinity(0))*2) as executor:
+        def map_helper(args):
+            return compare_media_file_paths(args[0], args[1])
+
+        for (path1, path2), probe_result in zip(duplicate_paths, executor.map(map_helper, duplicate_paths)):
+            if probe_result is None:
+                files_already_linked += 1
+                already_saved_space_in_bytes += os.lstat(path1).st_size
+                continue
+
+            if probe_result == 0:
+                # if the results seem equal, let us flip a figurative coin for which path to overwrite
+                print("# this is the result of a coin flip", file=args.command_output_file)
+                path1, path2 = random.sample([path1, path2], k=2)
+            elif probe_result < 0:
+                # path2 is better than path1
+                path1, path2 = path2, path1
+
+            file_size_in_bytes = os.lstat(path2).st_size
+            space_savings_in_bytes += file_size_in_bytes
+            files_to_link += 1
+
+            print(f"# space savings of {file_size_in_bytes/(1024**3):.2f} GiB", file=args.command_output_file)
+            print(f"rm -f {shlex.quote(path2)}", file=args.command_output_file)
+
+            # replace path2's file name with path1's file name
+            path2 = os.path.join(os.path.dirname(path2), os.path.basename(path1))
+            print(f"ln -f {shlex.quote(path1)} {shlex.quote(path2)}", file=args.command_output_file)
+
+    if args.slack_webhook_url:
+        message = "nothing to link for file deduplication"
+        if files_to_link > 0:
+            message = f"HOLY SMOKES! We can save {space_savings_in_bytes/(1024**3):.2f} GiB by hard linking {files_to_link} files"
+
+        message += f"\nWe already save {already_saved_space_in_bytes/(1024**3):.2f} GiB by hard linking {files_already_linked} files"
+
+        requests.post(
+            args.slack_webhook_url,
+            headers={
+                'Content-type': 'application/json',
+            },
+            data=json.dumps({
+                "username": "Apollo Media Duplicates",
+                "icon_emoji": ":floppy_disk:",
+                "text": message,
+            }),
+        ).raise_for_status()
--- a/124
+++ b/124
@ -0,0 +1,124 @@
+#!/usr/bin/python3
+from typing import List
+from urllib.parse import urljoin
+import argparse
+import json
+import os
+import requests
+import sys
+import xml.etree.ElementTree as xml
+
+def refresh_radarr_library(base_url: str, api_key: str) -> None:
+    requests.post(
+        urljoin(f"{base_url}/", "api/v3/command"),
+        params={
+            "apiKey": api_key,
+        },
+        headers={
+            "Content-Type": "application/json",
+        },
+        data=json.dumps({
+            "name": "RefreshMovie",
+        }),
+    ).raise_for_status()
+
+
+def refresh_sonarr_library(base_url: str, api_key: str) -> None:
+    requests.post(
+        urljoin(f"{base_url}/", "api/command"),
+        params={
+            "apikey": api_key,
+        },
+        headers={
+            "Content-Type": "application/json",
+        },
+        data=json.dumps({
+            "name": "RefreshSeries",
+        }),
+    ).raise_for_status()
+
+
+class PlexClient:
+    def __init__(self, plex_base_url: str, plex_token: str):
+        self.__plex_base_url = plex_base_url
+        self.__plex_token = plex_token
+
+
+    def __create_plex_request(self, relative_path: str) -> str:
+        response = requests.get(
+            urljoin(f"{self.__plex_base_url}/", relative_path),
+            timeout=3,
+            headers={
+                "X-Plex-Token": self.__plex_token,
+            },
+        )
+
+        response.raise_for_status()
+
+        return response.text
+
+
+    def get_plex_library_sections(self) -> List[str]:
+        sections = xml.fromstring(self.__create_plex_request("library/sections"))
+
+        return [section.attrib["key"] for section in sections]
+
+
+    def refresh_plex_library_section(self, section_id: str) -> None:
+        self.__create_plex_request(f"library/sections/{section_id}/refresh")
+
+
+    def refresh_all_plex_libraries(self) -> None:
+        for section_id in self.get_plex_library_sections():
+            self.refresh_plex_library_section(section_id)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description="Radarr, Sonarr, & Plex library refresher",
+        epilog="All arguments will also fallback to environment variable settings of their name in capitalized snake case if not set. For example: if --plex-token isn't set, it will try to read the PLEX_TOKEN environment variable value",
+    )
+
+    parser.add_argument("--plex-base-url", type=str, nargs="?", default=os.getenv("PLEX_BASE_URL"), metavar="http://example.com:32400")
+    parser.add_argument("--plex-token", type=str, nargs="?", default=os.getenv("PLEX_TOKEN"), metavar="Z99EuQ9Xy9G9z9PQFl99")
+    parser.add_argument("--sonarr-base-url", type=str, nargs="?", default=os.getenv("SONARR_BASE_URL"), metavar="http://example.com/sonarr")
+    parser.add_argument("--sonarr-api-key", type=str, nargs="?", default=os.getenv("SONARR_API_KEY"), metavar="6ce909a20a634f7cbe245e1893865ee5")
+    parser.add_argument("--radarr-base-url", type=str, nargs="?", default=os.getenv("RADARR_BASE_URL"), metavar="http://example.com/radarr")
+    parser.add_argument("--radarr-api-key", type=str, nargs="?", default=os.getenv("RADARR_API_KEY"), metavar="7ce909a20a634f7cbe245e1893865ee8")
+
+    args = parser.parse_args()
+
+    print_help = True
+
+    if bool(args.plex_base_url) != bool(args.plex_token):
+        print("if refreshing Plex, both the base url & token must be set", file=sys.stderr)
+        sys.exit(1)
+
+    if bool(args.sonarr_base_url) != bool(args.sonarr_api_key):
+        print("if refreshing sonarr, both the base url & API key must be set", file=sys.stderr)
+        sys.exit(2)
+
+    if bool(args.radarr_base_url) != bool(args.radarr_api_key):
+        print("if refreshing radarr, both the base url & API key must be set", file=sys.stderr)
+        sys.exit(3)
+
+    if args.plex_base_url:
+        print_help = False
+        print(f"refreshing all Plex libraries at {args.plex_base_url}")
+        PlexClient(args.plex_base_url, args.plex_token).refresh_all_plex_libraries()
+        print(f"successfully initiated a refresh of all Plex libraries at {args.plex_base_url}")
+
+    if args.sonarr_base_url:
+        print_help = False
+        print(f"refreshing sonarr library at {args.sonarr_base_url}")
+        refresh_sonarr_library(args.sonarr_base_url, args.sonarr_api_key)
+        print(f"successfully initiated a refresh for the sonarr library at {args.sonarr_base_url}")
+
+    if args.radarr_base_url:
+        print_help = False
+        print(f"refreshing radarr library at {args.radarr_base_url}")
+        refresh_radarr_library(args.radarr_base_url, args.radarr_api_key)
+        print(f"successfully initiated a refresh for the radarr library at {args.radarr_base_url}")
+
+    if print_help:
+        parser.print_help()
+        sys.exit(-1)