SRT 자막 파일을 SAMI 파일로 바꾸는 방법, python source code (동영상 자막 변환, 비디오 자막 포맷 변환, 자막 파일 포맷 변경 방법)

//

"""
Convert SRT file to SMI file
"""
import os
import argparse
import pysrt
from bs4 import BeautifulSoup
import re
import chardet


def get_encoding(file_path):
    with open(file_path, "rb") as f:
        result = chardet.detect(f.read())
        return result["encoding"]


def change_file_extension(srt_file_path, new_ext="smi"):
    root, ext = os.path.splitext(srt_file_path)
    smi_file_path = f"{root}.{new_ext}"
    return smi_file_path


# -----------------------------------------------------------------------------
def convert_srt_to_smi(srt_file_path, lang="ko"):
    root, ext = os.path.splitext(srt_file_path)
    if ext != ".srt":
        print("srt 파일이 아닙니다.")
        return

    # Load the SRT file
    encoding = get_encoding(srt_file_path)
    subs = pysrt.open(srt_file_path, encoding=encoding)

    lang_str = "KRCC"
    if lang == "ko":
        lang_str = "KRCC"
    elif lang == "ENCC":
        lang_str = "ENCC"

    # Start of SMI file
    smi_content = '<SAMI>\n<HEAD>\n<STYLE TYPE="text/css">\n<!--\nP { font-family: Arial; font-weight: normal; color: white; background-color: black; text-align: center; }\n-->\n</STYLE>\n</HEAD>\n<BODY>\n'

    # Convert each subtitle
    for sub in subs:
        start_time = (
            sub.start.hours * 3600000
            + sub.start.minutes * 60000
            + sub.start.seconds * 1000
            + sub.start.milliseconds
        )
        end_time = (
            sub.end.hours * 3600000 + sub.end.minutes * 60000 + sub.end.seconds * 1000 + sub.end.milliseconds
        )

        text = sub.text.replace("\n", "<br>")
        smi_content += f"<SYNC Start={str(start_time)}><P Class={lang_str}>\n{text}\n"

        smi_content += f"<SYNC Start={str(end_time)}><P Class={lang_str}>&nbsp;\n"

    # End of SMI file
    smi_content += "</BODY>\n</SAMI>"

    # Write the SMI file
    smi_file = change_file_extension(srt_file_path, "smi")
    with open(smi_file, "w", encoding="utf-8") as f:
        f.write(smi_content)
        
    print("자막 변환", smi_file)


# -----------------------------------------------------------------------------
def main():
    parser = argparse.ArgumentParser(
        description="Convert SRT file to SMI file.",
        usage="%(prog)s <file_path>  [-l|--lang <language>]",
    )

    parser.add_argument("file_path", metavar="FILE", type=str, help="The target file")
    parser.add_argument(
        "-l",
        "--lang",
        type=str,
        default="ko",
        choices=["ko", "en"],
        help="자막 언어, 한국어(ko), 영어(en)",
    )
    args = parser.parse_args()

    target_file = args.file_path
    target_lang = args.file_path

    #
    convert_srt_to_smi(target_file, target_lang)


if __name__ == "__main__":
    main()

//

반응형
Posted by codens