#!/usr/local/lib/pytorch-venv/bin/python3 from idlelib.iomenu import encoding import ctranslate2, transformers from huggingface_hub import snapshot_download import os import pysrt import chardet class Translator: def __init__(self): model_path = snapshot_download("zenoverflow/madlad400-10b-mt-ct2-int8-float16") print("\n", end="") self.translator = ctranslate2.Translator(model_path, device="cuda") self.tokenizer = transformers.T5Tokenizer.from_pretrained(model_path) def translate(self, text): target_lang_code = "fa" input_text = f"<2{target_lang_code}> {text}" input_tokens = self.tokenizer.convert_ids_to_tokens(self.tokenizer.encode(input_text)) results = self.translator.translate_batch([input_tokens]) output_tokens = results[0].hypotheses[0] output_text = self.tokenizer.decode(self.tokenizer.convert_tokens_to_ids(output_tokens)) return output_text class Subtitle: def __init__(self, sub_dir, filename): self.sub_dir = sub_dir self.filename = filename self.file_path = os.path.join(self.sub_dir, self.filename) def translate_subtitle(self, encoding='UTF-8'): subtitles = pysrt.open(self.file_path, encoding=encoding) for sub in subtitles: subtitle_line = sub.text.replace('\n', ' ') translated_text = self.translate_text(subtitle_line) sub.text = translated_text translated_dir = os.path.join(self.sub_dir, 'translated') if not os.path.exists(translated_dir): os.makedirs(translated_dir) output_srt_file_path = os.path.join(translated_dir, self.filename) subtitles.save(output_srt_file_path, encoding='UTF-8') return output_srt_file_path def translate_text(self, text): translated_text = translator.translate(text) print(translated_text) return translated_text def check_encoding(subtitle_file): with open(subtitle_file, 'rb') as f: rawdata = f.read() result = chardet.detect(rawdata) sub_encoding = result['encoding'] return sub_encoding if __name__ == '__main__': translator = Translator() home_dir = os.path.expanduser('~') directory = os.path.join(home_dir, 'Documents', 'Subtitles') files = os.listdir(directory) srt_files = [f for f in files if f.endswith('.srt')] for srt_file in srt_files: subtitle = Subtitle(directory, srt_file) subtitle_full_path = os.path.join(directory, srt_file) subtitle_encoding = check_encoding(subtitle_full_path) translated_file = subtitle.translate_subtitle(subtitle_encoding) if translated_file: print(translated_file) else: print("No translation")