diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/Subtitle-Translator.iml b/.idea/Subtitle-Translator.iml
new file mode 100644
index 0000000..f639c26
--- /dev/null
+++ b/.idea/Subtitle-Translator.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..9875417
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..ca1f174
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/st.py b/st.py
new file mode 100755
index 0000000..651d265
--- /dev/null
+++ b/st.py
@@ -0,0 +1,75 @@
+#!/usr/local/lib/pytorch-venv/bin/python3
+from idlelib.iomenu import encoding
+
+import ctranslate2, transformers
+from huggingface_hub import snapshot_download
+import os
+import pysrt
+import chardet
+
+
+class Translator:
+ def __init__(self):
+ model_path = snapshot_download("zenoverflow/madlad400-10b-mt-ct2-int8-float16")
+ print("\n", end="")
+ self.translator = ctranslate2.Translator(model_path, device="cuda")
+ self.tokenizer = transformers.T5Tokenizer.from_pretrained(model_path)
+
+ def translate(self, text):
+ target_lang_code = "fa"
+ input_text = f"<2{target_lang_code}> {text}"
+ input_tokens = self.tokenizer.convert_ids_to_tokens(self.tokenizer.encode(input_text))
+ results = self.translator.translate_batch([input_tokens])
+ output_tokens = results[0].hypotheses[0]
+ output_text = self.tokenizer.decode(self.tokenizer.convert_tokens_to_ids(output_tokens))
+ return output_text
+
+
+class Subtitle:
+ def __init__(self, sub_dir, filename):
+ self.sub_dir = sub_dir
+ self.filename = filename
+ self.file_path = os.path.join(self.sub_dir, self.filename)
+
+ def translate_subtitle(self, encoding='UTF-8'):
+ subtitles = pysrt.open(self.file_path, encoding=encoding)
+ for sub in subtitles:
+ subtitle_line = sub.text.replace('\n', ' ')
+ translated_text = self.translate_text(subtitle_line)
+ sub.text = translated_text
+ translated_dir = os.path.join(self.sub_dir, 'translated')
+ if not os.path.exists(translated_dir):
+ os.makedirs(translated_dir)
+ output_srt_file_path = os.path.join(translated_dir, self.filename)
+ subtitles.save(output_srt_file_path, encoding='UTF-8')
+ return output_srt_file_path
+
+ def translate_text(self, text):
+ translated_text = translator.translate(text)
+ print(translated_text)
+ return translated_text
+
+
+def check_encoding(subtitle_file):
+ with open(subtitle_file, 'rb') as f:
+ rawdata = f.read()
+ result = chardet.detect(rawdata)
+ sub_encoding = result['encoding']
+ return sub_encoding
+
+
+if __name__ == '__main__':
+ translator = Translator()
+ home_dir = os.path.expanduser('~')
+ directory = os.path.join(home_dir, 'Documents', 'Subtitles')
+ files = os.listdir(directory)
+ srt_files = [f for f in files if f.endswith('.srt')]
+ for srt_file in srt_files:
+ subtitle = Subtitle(directory, srt_file)
+ subtitle_full_path = os.path.join(directory, srt_file)
+ subtitle_encoding = check_encoding(subtitle_full_path)
+ translated_file = subtitle.translate_subtitle(subtitle_encoding)
+ if translated_file:
+ print(translated_file)
+ else:
+ print("No translation")