Source code for stellascript.cli

# stellascript/cli.py

import argparse
import warnings
from . import config
from .logging_config import get_logger

logger = get_logger(__name__)


[docs]
def parse_args() -> argparse.Namespace:
    """
    Parses command-line arguments for the Stellascript application.

    This function sets up an ArgumentParser to handle various command-line options
    for transcription, including language, model selection, input file,
    diarization, and audio enhancement. It also includes argument validation
    to ensure compatibility between different options.

    Returns:
        argparse.Namespace: An object containing the parsed command-line arguments.
    """
    parser = argparse.ArgumentParser(
        description="Transcribe audio live from microphone or from a file."
    )
    parser.add_argument(
        "--language",
        type=str,
        choices=["af", "am", "ar", "as", "az", "ba", "be", "bg", "bn", "bo", "br", "bs", "ca", "cs", "cy", "da", "de", "el", "en", "es", "et", "eu", "fa", "fi", "fo", "fr", "gl", "gu", "ha", "haw", "he", "hi", "hr", "ht", "hu", "hy", "id", "is", "it", "ja", "jw", "ka", "kk", "km", "kn", "ko", "la", "lb", "ln", "lo", "lt", "lv", "mg", "mi", "mk", "ml", "mn", "mr", "ms", "mt", "my", "ne", "nl", "nn", "no", "oc", "pa", "pl", "ps", "pt", "ro", "ru", "sa", "sd", "si", "sk", "sl", "sn", "so", "sq", "sr", "su", "sv", "sw", "ta", "te", "tg", "th", "tk", "tl", "tr", "tt", "uk", "ur", "uz", "vi", "yi", "yo", "zh", "yue"],
        default="en",
        help="Language for transcription.",
    )
    parser.add_argument(
        "--model",
        type=str,
        choices=config.MODELS,
        default="small",
        help="Whisper model to use.",
    )
    parser.add_argument(
        "--file",
        type=str,
        default=None,
        help="Path to a WAV audio file to transcribe.",
    )
    parser.add_argument(
        "--threshold",
        type=float,
        default=0.7,
        help="Similarity threshold for speaker identification (used with --diarization cluster).",
    )
    parser.add_argument(
        "--mode",
        type=str,
        choices=["block", "segment", "word"],
        default="block",
        help=(
            "Controls the timestamp granularity and output format. "
            "'block': For readable transcripts with timestamps for large text blocks. "
            "'segment': For subtitles with timestamps for short speech segments. "
            "'word': For detailed analysis with a timestamp for every single word."
        ),
    )
    parser.add_argument(
        "--min-speakers",
        type=int,
        default=None,
        help="Minimum number of speakers (file mode only).",
    )
    parser.add_argument(
        "--max-speakers",
        type=int,
        default=None,
        help="Maximum number of speakers (file mode only).",
    )
    parser.add_argument(
        "--diarization",
        type=str,
        choices=["pyannote", "cluster"],
        default="pyannote",
        help="Speaker diarization method.",
    )
    parser.add_argument(
        "--enhancement",
        type=str,
        choices=["none", "deepfilternet", "demucs"],
        default="none",
        help="Audio enhancement method.",
    )
    parser.add_argument(
        "--save-enhanced-audio",
        action="store_true",
        help="Save the enhanced audio to a new file.",
    )
    parser.add_argument(
        "--save-recorded-audio",
        action="store_true",
        help="Save the raw recorded audio from the microphone to a WAV file.",
    )

    args = parser.parse_args()

    validate_args(args, parser)
    return args




[docs]
def validate_args(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None:
    """
    Validates the parsed command-line arguments to ensure they are consistent.

    This function checks for various invalid combinations of arguments, such as:
    - Using speaker count constraints in live mode.
    - Incompatible diarization and transcription modes.
    - Misuse of the similarity threshold with certain diarization methods.
    - Conflicting arguments for speaker count and similarity threshold.

    Args:
        args (argparse.Namespace): The parsed command-line arguments.
        parser (argparse.ArgumentParser): The argument parser, used to report errors.

    Raises:
        SystemExit: If an invalid combination of arguments is found, the program
                    exits with an error message.
    """
    if (args.min_speakers is not None or args.max_speakers is not None) and not args.file:
        parser.error("--min-speakers and --max-speakers can only be used in file mode (--file).")

    if not args.file and args.mode == "block" and args.diarization == "cluster":
        parser.error(
            "In live mode, '--diarization cluster' is only compatible with '--mode segment'."
        )

    # The --threshold argument is only used for 'cluster' diarization.
    if args.diarization == "pyannote" and args.threshold != parser.get_default("threshold"):
        parser.error("--threshold cannot be used with --diarization pyannote.")

    # --min-speakers is only for pyannote
    if args.diarization == "cluster" and args.min_speakers is not None:
        parser.error("--min-speakers cannot be used with --diarization cluster.")

    # In cluster mode, threshold and max_speakers are mutually exclusive
    if (
        args.diarization == "cluster"
        and args.max_speakers is not None
        and args.threshold != parser.get_default("threshold")
    ):
        parser.error(
            "--threshold and --max-speakers cannot be used together with --diarization cluster."
        )