Skip to content

Commit cf8bd83

Browse files
add audioqna asr wer eval scripts (#117)
* add wer eval scripts * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent cff0a36 commit cf8bd83

File tree

4 files changed

+147
-0
lines changed

4 files changed

+147
-0
lines changed

examples/AudioQnA/README.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# AudioQnA accuracy Evaluation
2+
3+
## Dataset
4+
5+
6+
We evaluate the ASR accuracy on the test set of librispeech [dataset](andreagasparini/librispeech_test_only), which contains 2620 records of audio and texts.
7+
8+
## Metrics
9+
10+
We evaluate the WER (Word Error Rate) metric of the ASR microservice.
11+
12+
## Evaluation
13+
14+
### Launch ASR microservice
15+
16+
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr).
17+
18+
```bash
19+
git clone https://github.com/opea-project/GenAIComps
20+
cd GenAIComps
21+
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
22+
# change the name of model by editing model_name_or_path you want to evaluate
23+
docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper:latest --model_name_or_path "openai/whisper-tiny"
24+
```
25+
26+
### Evaluate
27+
28+
Install dependencies:
29+
30+
```
31+
pip install -r requirements.txt
32+
```
33+
34+
Evaluate the performance with the LLM:
35+
```py
36+
# validate the offline model
37+
# python offline_evaluate.py
38+
# validate the online asr microservice accuracy
39+
python online_evaluate.py
40+
```
41+
42+
### Performance Result
43+
Here is the tested result for your reference
44+
|| WER |
45+
| --- | ---- |
46+
|whisper-large-v2| 2.87|
47+
|whisper-large| 2.7 |
48+
|whisper-medium| 3.45 |

examples/AudioQnA/local_eval.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import torch
5+
from datasets import load_dataset
6+
from evaluate import load
7+
from transformers import WhisperForConditionalGeneration, WhisperProcessor
8+
9+
device = "cuda" if torch.cuda.is_available() else "cpu"
10+
11+
MODEL_NAME = "openai/whisper-large-v2"
12+
13+
librispeech_test_clean = load_dataset(
14+
"andreagasparini/librispeech_test_only", "clean", split="test", trust_remote_code=True
15+
)
16+
processor = WhisperProcessor.from_pretrained(MODEL_NAME)
17+
model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME).to(device)
18+
19+
20+
def map_to_pred(batch):
21+
audio = batch["audio"]
22+
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
23+
batch["reference"] = processor.tokenizer._normalize(batch["text"])
24+
25+
with torch.no_grad():
26+
predicted_ids = model.generate(input_features.to(device))[0]
27+
transcription = processor.decode(predicted_ids)
28+
batch["prediction"] = processor.tokenizer._normalize(transcription)
29+
return batch
30+
31+
32+
result = librispeech_test_clean.map(map_to_pred)
33+
34+
wer = load("wer")
35+
print(100 * wer.compute(references=result["reference"], predictions=result["prediction"]))

examples/AudioQnA/online_eval.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import base64
5+
import json
6+
7+
import requests
8+
import torch
9+
from datasets import load_dataset
10+
from evaluate import load
11+
from pydub import AudioSegment
12+
from transformers import WhisperForConditionalGeneration, WhisperProcessor
13+
14+
MODEL_NAME = "openai/whisper-large-v2"
15+
processor = WhisperProcessor.from_pretrained(MODEL_NAME)
16+
17+
librispeech_test_clean = load_dataset(
18+
"andreagasparini/librispeech_test_only", "clean", split="test", trust_remote_code=True
19+
)
20+
21+
22+
def map_to_pred(batch):
23+
batch["reference"] = processor.tokenizer._normalize(batch["text"])
24+
25+
file_path = batch["file"]
26+
# process the file_path
27+
pidx = file_path.rfind("/")
28+
sidx = file_path.rfind(".")
29+
30+
file_path_prefix = file_path[: pidx + 1]
31+
file_path_suffix = file_path[sidx:]
32+
file_path_mid = file_path[pidx + 1 : sidx]
33+
splits = file_path_mid.split("-")
34+
file_path_mid = f"LibriSpeech/test-clean/{splits[0]}/{splits[1]}/{file_path_mid}"
35+
36+
file_path = file_path_prefix + file_path_mid + file_path_suffix
37+
38+
audio = AudioSegment.from_file(file_path)
39+
audio.export("tmp.wav")
40+
with open("tmp.wav", "rb") as f:
41+
test_audio_base64_str = base64.b64encode(f.read()).decode("utf-8")
42+
43+
inputs = {"audio": test_audio_base64_str}
44+
endpoint = "http://localhost:7066/v1/asr"
45+
response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None})
46+
47+
result_str = response.json()["asr_result"]
48+
49+
batch["prediction"] = processor.tokenizer._normalize(result_str)
50+
return batch
51+
52+
53+
result = librispeech_test_clean.map(map_to_pred)
54+
55+
wer = load("wer")
56+
print(100 * wer.compute(references=result["reference"], predictions=result["prediction"]))

examples/AudioQnA/requirements.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
datasets
2+
evaluate
3+
jiwer
4+
librosa
5+
pydub
6+
soundfile
7+
torch
8+
transformers

0 commit comments

Comments
 (0)