Source code for stellascript.cli

# stellascript/cli.py

import argparse
import warnings
from . import config
from .logging_config import get_logger

logger = get_logger(__name__)

[docs] def parse_args() -> argparse.Namespace: """ Parses command-line arguments for the Stellascript application. This function sets up an ArgumentParser to handle various command-line options for transcription, including language, model selection, input file, diarization, and audio enhancement. It also includes argument validation to ensure compatibility between different options. Returns: argparse.Namespace: An object containing the parsed command-line arguments. """ parser = argparse.ArgumentParser( description="Transcribe audio live from microphone or from a file." ) parser.add_argument( "--language", type=str, choices=["af", "am", "ar", "as", "az", "ba", "be", "bg", "bn", "bo", "br", "bs", "ca", "cs", "cy", "da", "de", "el", "en", "es", "et", "eu", "fa", "fi", "fo", "fr", "gl", "gu", "ha", "haw", "he", "hi", "hr", "ht", "hu", "hy", "id", "is", "it", "ja", "jw", "ka", "kk", "km", "kn", "ko", "la", "lb", "ln", "lo", "lt", "lv", "mg", "mi", "mk", "ml", "mn", "mr", "ms", "mt", "my", "ne", "nl", "nn", "no", "oc", "pa", "pl", "ps", "pt", "ro", "ru", "sa", "sd", "si", "sk", "sl", "sn", "so", "sq", "sr", "su", "sv", "sw", "ta", "te", "tg", "th", "tk", "tl", "tr", "tt", "uk", "ur", "uz", "vi", "yi", "yo", "zh", "yue"], default="en", help="Language for transcription.", ) parser.add_argument( "--model", type=str, choices=config.MODELS, default="small", help="Whisper model to use.", ) parser.add_argument( "--file", type=str, default=None, help="Path to a WAV audio file to transcribe.", ) parser.add_argument( "--threshold", type=float, default=0.7, help="Similarity threshold for speaker identification (used with --diarization cluster).", ) parser.add_argument( "--mode", type=str, choices=["block", "segment", "word"], default="block", help=( "Controls the timestamp granularity and output format. " "'block': For readable transcripts with timestamps for large text blocks. " "'segment': For subtitles with timestamps for short speech segments. " "'word': For detailed analysis with a timestamp for every single word." ), ) parser.add_argument( "--min-speakers", type=int, default=None, help="Minimum number of speakers (file mode only).", ) parser.add_argument( "--max-speakers", type=int, default=None, help="Maximum number of speakers (file mode only).", ) parser.add_argument( "--diarization", type=str, choices=["pyannote", "cluster"], default="pyannote", help="Speaker diarization method.", ) parser.add_argument( "--enhancement", type=str, choices=["none", "deepfilternet", "demucs"], default="none", help="Audio enhancement method.", ) parser.add_argument( "--save-enhanced-audio", action="store_true", help="Save the enhanced audio to a new file.", ) parser.add_argument( "--save-recorded-audio", action="store_true", help="Save the raw recorded audio from the microphone to a WAV file.", ) args = parser.parse_args() validate_args(args, parser) return args
[docs] def validate_args(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: """ Validates the parsed command-line arguments to ensure they are consistent. This function checks for various invalid combinations of arguments, such as: - Using speaker count constraints in live mode. - Incompatible diarization and transcription modes. - Misuse of the similarity threshold with certain diarization methods. - Conflicting arguments for speaker count and similarity threshold. Args: args (argparse.Namespace): The parsed command-line arguments. parser (argparse.ArgumentParser): The argument parser, used to report errors. Raises: SystemExit: If an invalid combination of arguments is found, the program exits with an error message. """ if (args.min_speakers is not None or args.max_speakers is not None) and not args.file: parser.error("--min-speakers and --max-speakers can only be used in file mode (--file).") if not args.file and args.mode == "block" and args.diarization == "cluster": parser.error( "In live mode, '--diarization cluster' is only compatible with '--mode segment'." ) # The --threshold argument is only used for 'cluster' diarization. if args.diarization == "pyannote" and args.threshold != parser.get_default("threshold"): parser.error("--threshold cannot be used with --diarization pyannote.") # --min-speakers is only for pyannote if args.diarization == "cluster" and args.min_speakers is not None: parser.error("--min-speakers cannot be used with --diarization cluster.") # In cluster mode, threshold and max_speakers are mutually exclusive if ( args.diarization == "cluster" and args.max_speakers is not None and args.threshold != parser.get_default("threshold") ): parser.error( "--threshold and --max-speakers cannot be used together with --diarization cluster." )