Skip to content
This repository was archived by the owner on Sep 18, 2025. It is now read-only.
Draft
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .circleci/config.yml
Comment thread
fissoreg marked this conversation as resolved.
Outdated
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: TestPyPI Publish

on:
push:
branches:
- main

jobs:
test_pypi_publish:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.x

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y python3-pip
pip install pipenv
pipenv install twine

- name: Build and Publish
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }}
REPOSITORY_URL: https://test.pypi.org/legacy/
run: |
python setup.py sdist bdist_wheel
pipenv run twine upload --repository-url $REPOSITORY_URL dist/*
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,7 @@ cython_debug/
.vscode

# Mac files
.DS_Store
.DS_Store

output
exp.ipynb
2 changes: 2 additions & 0 deletions examples/speech_to_text/output/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
source_type: speech
target_type: speech
80 changes: 80 additions & 0 deletions examples/speech_to_text/visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import os
import pandas as pd
import argparse
import pprint


def read_scores_from_folder(folder_path):
score_file_path = os.path.join(folder_path, "scores.tsv")
Comment thread
fissoreg marked this conversation as resolved.
Outdated
if os.path.isfile(score_file_path):
with open(score_file_path, "r") as f:
contents = [line.strip() for line in f.read().split("\n") if line.strip()]
return contents
else:
return None


def read_scores_files(output_folder):
all_contents = []

if not os.path.isdir(output_folder):
raise ValueError("Output folder does not exist")
Comment thread
fissoreg marked this conversation as resolved.

output_folder = os.path.abspath(output_folder)

for folder in os.listdir(output_folder):
folder_path = os.path.join(output_folder, folder)

if os.path.isdir(folder_path):
contents = read_scores_from_folder(folder_path)
if contents:
all_contents.append(contents)
return all_contents


def process_result(output_folder, metric_names):
Comment thread
SamDewriter marked this conversation as resolved.
all_contents = read_scores_files(output_folder)

# Extracting headers from the first line of each "scores.tsv" file
headers = [contents[0].split() for contents in all_contents if contents]

if not headers:
raise ValueError("No headers found in the results")
Comment thread
fissoreg marked this conversation as resolved.
Outdated
reference_header = headers[0]

if metric_names is None:
metric_names = reference_header
common_metrics = set(metric_names).intersection(reference_header)
Comment thread
fissoreg marked this conversation as resolved.

if not common_metrics:
raise ValueError("No common metrics found in the results")

# Extracting scores for each metric
scores = []
for contents in all_contents:
if contents:
values = dict(zip(contents[0].split(), contents[1].split()))
scores.append(values)

df = pd.DataFrame(scores)

df = df.fillna(0.0)
Comment thread
fissoreg marked this conversation as resolved.
Outdated
filtered_df = df[df.columns[df.columns.isin(common_metrics)]]

if len(common_metrics) == 1:
metric_name = list(common_metrics)[0]
filtered_df = filtered_df[filtered_df[metric_name] != 0.0]

return filtered_df


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--output", type=str, default=None, help="Output directory")
parser.add_argument(
"--metrics", type=str, nargs="+", default=None, help="Metrics to be extracted"
)
args = parser.parse_args()

df = process_result(args.output, args.metrics)
pprint.pprint(df)