initial commit

2023-08-20 11:26:15 +02:00 · 2023-08-20 11:26:15 +02:00 · be779f0aca
commit be779f0aca
8 changed files with 5051 additions and 0 deletions
--- a/nlp_interview_transcription/init.py
+++ b/nlp_interview_transcription/init.py
--- a/nlp_interview_transcription/model.py
+++ b/nlp_interview_transcription/model.py
@ -0,0 +1,18 @@
+# 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
+
+# set up locale
+import locale
+locale.getpreferredencoding = lambda: "UTF-8"
+
+# prepend silent audio to clip
+from pydub import AudioSegment
+
+spacermilli = 2000
+spacer = AudioSegment.silent(duration=spacermilli)
+
+
+audio = AudioSegment.from_wav("input.wav") 
+
+audio = spacer.append(audio, crossfade=0)
+
+audio.export('input_prep.wav', format='wav')
--- a/nlp_interview_transcription/notebook.py
+++ b/nlp_interview_transcription/notebook.py
@ -0,0 +1,24 @@
+## SETTINGS FOR LATER
+from pathlib import Path
+
+# @markdown Enter the URL of the YouTube video, or the path to the video/audio file you want to transcribe, give the output path, etc. and run the cell. HTML file embeds the video for YouTube, and audio for media files.
+
+Source = "Youtube"  # @param ['Youtube', 'File (Google Drive)']
+# @markdown ---
+# @markdown #### **Youtube video**
+video_url = "https://youtu.be/hpZFJctBUHQ"  # @param {type:"string"}
+# store_audio = True #@param {type:"boolean"}
+# @markdown ---
+# @markdown #### **Google Drive video or audio path (mp4, wav, mp3)**
+video_path = "/content/drive/MyDrive/Customer_Service.mp3"  # @param {type:"string"}
+# @markdown ---
+output_path = "/content/transcript/"  # @param {type:"string"}
+output_path = str(Path(output_path))
+# @markdown ---
+# @markdown #### **Title for transcription of media file**
+audio_title = "Sample Order Taking"  # @param {type:"string"}
+# @markdown ---
+# @markdown #### Copy a token from your [Hugging Face tokens page](https://huggingface.co/settings/tokens) and paste it below.
+access_token = "hf_"  # @param {type:"string"}
+# @markdown ---
+# @markdown **Run this cell again if you change the video.**