-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtranscript
More file actions
93 lines (80 loc) · 2.61 KB
/
transcript
File metadata and controls
93 lines (80 loc) · 2.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/env bash
# transcript — extract audio with ffmpeg, transcribe with OpenAI Whisper API.
#
# Usage:
# transcript <input> [-f txt|srt|vtt|json|verbose_json] [-l <lang>] [-o <out>] [-k]
#
# Env:
# OPENAI_API_KEY required
#
# Notes:
# - Encodes to 16 kHz mono MP3 @ 32 kbps (Whisper-optimal, tiny files).
# - Auto-splits if encoded audio exceeds 25 MB (API limit).
set -euo pipefail
die() { echo "error: $*" >&2; exit 1; }
[[ $# -ge 1 ]] || die "usage: transcript <input> [-f fmt] [-l lang] [-o out] [-k]"
input=$1; shift
fmt=text
lang=
out=
keep=0
while [[ $# -gt 0 ]]; do
case $1 in
-f|--format) fmt=$2; shift 2 ;;
-l|--lang) lang=$2; shift 2 ;;
-o|--output) out=$2; shift 2 ;;
-k|--keep) keep=1; shift ;;
*) die "unknown arg: $1" ;;
esac
done
[[ -f $input ]] || die "file not found: $input"
[[ -n ${OPENAI_API_KEY-} ]] || die "OPENAI_API_KEY not set"
command -v ffmpeg >/dev/null || die "ffmpeg not installed"
command -v curl >/dev/null || die "curl not installed"
# API format mapping (API uses 'text' not 'txt')
case $fmt in
txt|text) api_fmt=text; ext=txt ;;
srt) api_fmt=srt; ext=srt ;;
vtt) api_fmt=vtt; ext=vtt ;;
json) api_fmt=json; ext=json ;;
verbose_json) api_fmt=verbose_json; ext=json ;;
*) die "unknown format: $fmt" ;;
esac
base=${input%.*}
out=${out:-${base}.${ext}}
tmp_mp3=$(mktemp --suffix=.mp3)
trap '[[ $keep -eq 0 ]] && rm -f "$tmp_mp3"' EXIT
echo ">> encoding audio (16 kHz mono, 32 kbps mp3)..." >&2
ffmpeg -y -loglevel error -i "$input" \
-vn -ac 1 -ar 16000 -c:a libmp3lame -b:a 32k \
"$tmp_mp3"
size=$(stat -c%s "$tmp_mp3")
limit=$((25 * 1024 * 1024))
transcribe_one() {
local file=$1
local args=(-sS -X POST https://api.openai.com/v1/audio/transcriptions
-H "Authorization: Bearer $OPENAI_API_KEY"
-F "file=@${file}"
-F "model=whisper-1"
-F "response_format=${api_fmt}")
[[ -n $lang ]] && args+=(-F "language=${lang}")
curl "${args[@]}"
}
if (( size <= limit )); then
echo ">> transcribing ($(numfmt --to=iec "$size"))..." >&2
transcribe_one "$tmp_mp3" > "$out"
else
echo ">> audio > 25MB, splitting into chunks..." >&2
chunk_dir=$(mktemp -d)
trap '[[ $keep -eq 0 ]] && rm -rf "$tmp_mp3" "$chunk_dir"' EXIT
# 10-minute chunks @ 32kbps ≈ 2.4 MB each, safely under limit
ffmpeg -y -loglevel error -i "$tmp_mp3" -f segment -segment_time 600 \
-c copy "$chunk_dir/chunk_%03d.mp3"
: > "$out"
for c in "$chunk_dir"/chunk_*.mp3; do
echo ">> chunk $(basename "$c")" >&2
transcribe_one "$c" >> "$out"
[[ $api_fmt == text ]] && echo "" >> "$out"
done
fi
echo ">> done: $out" >&2