diarize_worker.py 821 B

123456789101112131415161718192021222324252627282930313233
  1. #!/home/superti/miniconda3/envs/diarization/bin/python
  2. import warnings
  3. warnings.filterwarnings("ignore")
  4. import json
  5. from pyannote.audio import Pipeline
  6. import os
  7. from torch import device as torch_device
  8. import sys
  9. import dotenv
  10. dotenv.load_dotenv()
  11. def diarize(file):
  12. pipeline = Pipeline.from_pretrained(
  13. "pyannote/speaker-diarization-3.1",
  14. token=os.environ["HF_TOKEN"],
  15. )
  16. pipeline.to(torch_device("cuda"))
  17. if not pipeline:
  18. raise RuntimeError("Pipeline not found")
  19. segments = pipeline(file)
  20. diariz = []
  21. for turn, speaker in segments.speaker_diarization:
  22. diariz.append({
  23. "start": turn.start,
  24. "end": turn.end,
  25. "speaker": speaker
  26. })
  27. print(json.dumps(diariz))
  28. if __name__ == "__main__":
  29. diarize(sys.argv[1])