mirror of https://github.com/Chocobozzz/PeerTube
Chocobozzz
7 months ago
172 changed files with 2666 additions and 937 deletions
@ -1,3 +1,3 @@
|
||||
export * from './common.js' |
||||
export * from './process-vod.js' |
||||
export * from './transcoding-logger.js' |
||||
export * from './winston-logger.js' |
||||
|
@ -0,0 +1,79 @@
|
||||
import { hasAudioStream } from '@peertube/peertube-ffmpeg' |
||||
import { RunnerJobTranscriptionPayload, TranscriptionSuccess } from '@peertube/peertube-models' |
||||
import { buildSUUID } from '@peertube/peertube-node-utils' |
||||
import { TranscriptionModel, WhisperBuiltinModel, transcriberFactory } from '@peertube/peertube-transcription' |
||||
import { remove } from 'fs-extra/esm' |
||||
import { join } from 'path' |
||||
import { ConfigManager } from '../../../shared/config-manager.js' |
||||
import { logger } from '../../../shared/index.js' |
||||
import { ProcessOptions, downloadInputFile, scheduleTranscodingProgress } from './common.js' |
||||
import { getWinstonLogger } from './winston-logger.js' |
||||
|
||||
export async function processVideoTranscription (options: ProcessOptions<RunnerJobTranscriptionPayload>) { |
||||
const { server, job, runnerToken } = options |
||||
|
||||
const config = ConfigManager.Instance.getConfig().transcription |
||||
|
||||
const payload = job.payload |
||||
|
||||
let inputPath: string |
||||
|
||||
const updateProgressInterval = scheduleTranscodingProgress({ |
||||
job, |
||||
server, |
||||
runnerToken, |
||||
progressGetter: () => undefined |
||||
}) |
||||
|
||||
const outputPath = join(ConfigManager.Instance.getTranscriptionDirectory(), buildSUUID()) |
||||
|
||||
const transcriber = transcriberFactory.createFromEngineName({ |
||||
engineName: config.engine, |
||||
enginePath: config.enginePath, |
||||
logger: getWinstonLogger() |
||||
}) |
||||
|
||||
try { |
||||
logger.info(`Downloading input file ${payload.input.videoFileUrl} for transcription job ${job.jobToken}`) |
||||
|
||||
inputPath = await downloadInputFile({ url: payload.input.videoFileUrl, runnerToken, job }) |
||||
|
||||
logger.info(`Downloaded input file ${payload.input.videoFileUrl} for job ${job.jobToken}. Running transcription.`) |
||||
|
||||
if (await hasAudioStream(inputPath) !== true) { |
||||
await server.runnerJobs.error({ |
||||
jobToken: job.jobToken, |
||||
jobUUID: job.uuid, |
||||
runnerToken, |
||||
message: 'This input file does not contain audio' |
||||
}) |
||||
|
||||
return |
||||
} |
||||
|
||||
const transcriptFile = await transcriber.transcribe({ |
||||
mediaFilePath: inputPath, |
||||
model: config.modelPath |
||||
? await TranscriptionModel.fromPath(config.modelPath) |
||||
: new WhisperBuiltinModel(config.model), |
||||
format: 'vtt', |
||||
transcriptDirectory: outputPath |
||||
}) |
||||
|
||||
const successBody: TranscriptionSuccess = { |
||||
inputLanguage: transcriptFile.language, |
||||
vttFile: transcriptFile.path |
||||
} |
||||
|
||||
await server.runnerJobs.success({ |
||||
jobToken: job.jobToken, |
||||
jobUUID: job.uuid, |
||||
runnerToken, |
||||
payload: successBody |
||||
}) |
||||
} finally { |
||||
if (inputPath) await remove(inputPath) |
||||
if (outputPath) await remove(outputPath) |
||||
if (updateProgressInterval) clearInterval(updateProgressInterval) |
||||
} |
||||
} |
@ -1,19 +0,0 @@
|
||||
import { LogFn } from 'pino' |
||||
import { logger } from '../../../shared/index.js' |
||||
|
||||
export function getTranscodingLogger () { |
||||
return { |
||||
info: buildWinstonLogger(logger.info.bind(logger)), |
||||
debug: buildWinstonLogger(logger.debug.bind(logger)), |
||||
warn: buildWinstonLogger(logger.warn.bind(logger)), |
||||
error: buildWinstonLogger(logger.error.bind(logger)) |
||||
} |
||||
} |
||||
|
||||
function buildWinstonLogger (log: LogFn) { |
||||
return (arg1: string, arg2?: object) => { |
||||
if (arg2) return log(arg2, arg1) |
||||
|
||||
return log(arg1) |
||||
} |
||||
} |
@ -0,0 +1,19 @@
|
||||
import { LogFn } from 'pino' |
||||
import { logger } from '../../../shared/index.js' |
||||
|
||||
export function getWinstonLogger () { |
||||
return { |
||||
info: buildLogLevelFn(logger.info.bind(logger)), |
||||
debug: buildLogLevelFn(logger.debug.bind(logger)), |
||||
warn: buildLogLevelFn(logger.warn.bind(logger)), |
||||
error: buildLogLevelFn(logger.error.bind(logger)) |
||||
} |
||||
} |
||||
|
||||
function buildLogLevelFn (log: LogFn) { |
||||
return (arg1: string, arg2?: object) => { |
||||
if (arg2) return log(arg2, arg1) |
||||
|
||||
return log(arg1) |
||||
} |
||||
} |
@ -1,37 +0,0 @@
|
||||
JiWER |
||||
===== |
||||
__JiWER__ CLI NodeJs wrapper. |
||||
|
||||
> *JiWER is a python tool for computing the word-error-rate of ASR systems.* |
||||
> https://jitsi.github.io/jiwer/cli/ |
||||
|
||||
__JiWER__ serves as a reference implementation to calculate errors rates between 2 text files: |
||||
- WER (Word Error Rate) |
||||
- CER (Character Error Rate) |
||||
|
||||
Build |
||||
----- |
||||
|
||||
```sh |
||||
npm run build |
||||
``` |
||||
|
||||
Usage |
||||
----- |
||||
```typescript |
||||
const jiwerCLI = new JiwerClI('./reference.txt', './hypothesis.txt') |
||||
|
||||
// WER as a percentage, ex: 0.03 -> 3% |
||||
console.log(await jiwerCLI.wer()) |
||||
|
||||
// CER as a percentage: 0.01 -> 1% |
||||
console.log(await jiwerCLI.cer()) |
||||
|
||||
// Detailed comparison report |
||||
console.log(await jiwerCLI.alignment()) |
||||
``` |
||||
|
||||
Resources |
||||
--------- |
||||
- https://jitsi.github.io/jiwer/ |
||||
- https://github.com/rapidfuzz/RapidFuzz |
@ -1,2 +1,3 @@
|
||||
export * from './file-storage.enum.js' |
||||
export * from './result-list.model.js' |
||||
export * from './simple-logger.model.js' |
||||
|
@ -0,0 +1,6 @@
|
||||
export type SimpleLogger = { |
||||
info: (msg: string, obj?: object) => void |
||||
debug: (msg: string, obj?: object) => void |
||||
warn: (msg: string, obj?: object) => void |
||||
error: (msg: string, obj?: object) => void |
||||
} |
@ -1,2 +1,3 @@
|
||||
export * from './video-caption.model.js' |
||||
export * from './video-caption-generate.model.js' |
||||
export * from './video-caption-update.model.js' |
||||
export * from './video-caption.model.js' |
||||
|
@ -0,0 +1,3 @@
|
||||
export interface VideoCaptionGenerate { |
||||
forceTranscription?: boolean // Default false
|
||||
} |
Binary file not shown.
Binary file not shown.
@ -0,0 +1,2 @@
|
||||
whisper-ctranslate2 |
||||
openai-whisper |
@ -0,0 +1,106 @@
|
||||
/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */ |
||||
|
||||
import { HttpStatusCode, UserRole } from '@peertube/peertube-models' |
||||
import { |
||||
PeerTubeServer, |
||||
cleanupTests, |
||||
createMultipleServers, |
||||
doubleFollow, |
||||
setAccessTokensToServers, |
||||
waitJobs |
||||
} from '@peertube/peertube-server-commands' |
||||
|
||||
describe('Test video transcription API validator', function () { |
||||
let servers: PeerTubeServer[] |
||||
|
||||
let userToken: string |
||||
let anotherUserToken: string |
||||
|
||||
let remoteId: string |
||||
let validId: string |
||||
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
before(async function () { |
||||
this.timeout(240000) |
||||
|
||||
servers = await createMultipleServers(2) |
||||
await setAccessTokensToServers(servers) |
||||
|
||||
await doubleFollow(servers[0], servers[1]) |
||||
|
||||
userToken = await servers[0].users.generateUserAndToken('user', UserRole.USER) |
||||
anotherUserToken = await servers[0].users.generateUserAndToken('user2', UserRole.USER) |
||||
|
||||
{ |
||||
const { uuid } = await servers[1].videos.quickUpload({ name: 'remote video' }) |
||||
remoteId = uuid |
||||
} |
||||
|
||||
{ |
||||
const { uuid } = await servers[0].videos.quickUpload({ name: 'both 1', token: userToken }) |
||||
validId = uuid |
||||
} |
||||
|
||||
await waitJobs(servers) |
||||
|
||||
await servers[0].config.enableTranscription() |
||||
}) |
||||
|
||||
it('Should not run transcription of an unknown video', async function () { |
||||
await servers[0].captions.runGenerate({ videoId: 404, expectedStatus: HttpStatusCode.NOT_FOUND_404 }) |
||||
}) |
||||
|
||||
it('Should not run transcription of a remote video', async function () { |
||||
await servers[0].captions.runGenerate({ videoId: remoteId, expectedStatus: HttpStatusCode.BAD_REQUEST_400 }) |
||||
}) |
||||
|
||||
it('Should not run transcription by a owner/moderator user', async function () { |
||||
await servers[0].captions.runGenerate({ videoId: validId, token: anotherUserToken, expectedStatus: HttpStatusCode.FORBIDDEN_403 }) |
||||
}) |
||||
|
||||
it('Should not run transcription if a caption file already exists', async function () { |
||||
await servers[0].captions.add({ |
||||
language: 'en', |
||||
videoId: validId, |
||||
fixture: 'subtitle-good1.vtt' |
||||
}) |
||||
|
||||
await servers[0].captions.runGenerate({ videoId: validId, expectedStatus: HttpStatusCode.BAD_REQUEST_400 }) |
||||
|
||||
await servers[0].captions.delete({ language: 'en', videoId: validId }) |
||||
}) |
||||
|
||||
it('Should not run transcription if the instance disabled it', async function () { |
||||
await servers[0].config.disableTranscription() |
||||
|
||||
await servers[0].captions.runGenerate({ videoId: validId, expectedStatus: HttpStatusCode.BAD_REQUEST_400 }) |
||||
|
||||
await servers[0].config.enableTranscription() |
||||
}) |
||||
|
||||
it('Should succeed to run transcription', async function () { |
||||
await servers[0].captions.runGenerate({ videoId: validId, token: userToken }) |
||||
}) |
||||
|
||||
it('Should fail to run transcription twice', async function () { |
||||
await servers[0].captions.runGenerate({ videoId: validId, token: userToken, expectedStatus: HttpStatusCode.CONFLICT_409 }) |
||||
}) |
||||
|
||||
it('Should fail to run transcription twice with a non-admin user with the forceTranscription boolean', async function () { |
||||
await servers[0].captions.runGenerate({ |
||||
videoId: validId, |
||||
token: userToken, |
||||
forceTranscription: true, |
||||
expectedStatus: HttpStatusCode.FORBIDDEN_403 |
||||
}) |
||||
}) |
||||
|
||||
it('Should succeed to run transcription twice with the forceTranscription boolean', async function () { |
||||
await servers[0].captions.runGenerate({ videoId: validId, forceTranscription: true }) |
||||
}) |
||||
|
||||
after(async function () { |
||||
await cleanupTests(servers) |
||||
}) |
||||
}) |
@ -0,0 +1,81 @@
|
||||
/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */ |
||||
|
||||
import { UserNotification } from '@peertube/peertube-models' |
||||
import { PeerTubeServer, cleanupTests, waitJobs } from '@peertube/peertube-server-commands' |
||||
import { MockSmtpServer } from '@tests/shared/mock-servers/mock-email.js' |
||||
import { |
||||
CheckerBaseParams, |
||||
checkMyVideoTranscriptionGenerated, |
||||
prepareNotificationsTest |
||||
} from '@tests/shared/notifications.js' |
||||
import { join } from 'path' |
||||
|
||||
describe('Test caption notifications', function () { |
||||
let servers: PeerTubeServer[] = [] |
||||
|
||||
let userNotifications: UserNotification[] = [] |
||||
let emails: object[] = [] |
||||
let userAccessToken: string |
||||
|
||||
before(async function () { |
||||
this.timeout(120000) |
||||
|
||||
const res = await prepareNotificationsTest(1) |
||||
emails = res.emails |
||||
userAccessToken = res.userAccessToken |
||||
servers = res.servers |
||||
userNotifications = res.userNotifications |
||||
}) |
||||
|
||||
describe('Transcription of my video generated is published', function () { |
||||
const language = { id: 'en', label: 'English' } |
||||
let baseParams: CheckerBaseParams |
||||
|
||||
before(() => { |
||||
baseParams = { |
||||
server: servers[0], |
||||
emails, |
||||
socketNotifications: userNotifications, |
||||
token: userAccessToken |
||||
} |
||||
}) |
||||
|
||||
async function uploadAndWait () { |
||||
const { uuid } = await servers[0].videos.upload({ |
||||
token: userAccessToken, |
||||
attributes: { |
||||
name: 'video', |
||||
fixture: join('transcription', 'videos', 'the_last_man_on_earth.mp4'), |
||||
language: undefined |
||||
} |
||||
}) |
||||
await waitJobs(servers) |
||||
|
||||
return servers[0].videos.get({ id: uuid }) |
||||
} |
||||
|
||||
it('Should not send a notification if transcription is not enabled', async function () { |
||||
this.timeout(50000) |
||||
|
||||
const { name, shortUUID } = await uploadAndWait() |
||||
|
||||
await checkMyVideoTranscriptionGenerated({ ...baseParams, videoName: name, shortUUID, language, checkType: 'absence' }) |
||||
}) |
||||
|
||||
it('Should send a notification transcription is enabled', async function () { |
||||
this.timeout(240000) |
||||
|
||||
await servers[0].config.enableTranscription() |
||||
|
||||
const { name, shortUUID } = await uploadAndWait() |
||||
|
||||
await checkMyVideoTranscriptionGenerated({ ...baseParams, videoName: name, shortUUID, language, checkType: 'presence' }) |
||||
}) |
||||
}) |
||||
|
||||
after(async function () { |
||||
MockSmtpServer.Instance.kill() |
||||
|
||||
await cleanupTests(servers) |
||||
}) |
||||
}) |
@ -0,0 +1,109 @@
|
||||
/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */ |
||||
|
||||
import { |
||||
RunnerJobTranscriptionPayload, |
||||
TranscriptionSuccess |
||||
} from '@peertube/peertube-models' |
||||
import { |
||||
PeerTubeServer, |
||||
cleanupTests, |
||||
createMultipleServers, |
||||
doubleFollow, |
||||
setAccessTokensToServers, |
||||
setDefaultVideoChannel, |
||||
waitJobs |
||||
} from '@peertube/peertube-server-commands' |
||||
import { checkPersistentTmpIsEmpty } from '@tests/shared/directories.js' |
||||
import { expect } from 'chai' |
||||
|
||||
describe('Test runner transcription', function () { |
||||
let servers: PeerTubeServer[] = [] |
||||
let runnerToken: string |
||||
|
||||
before(async function () { |
||||
this.timeout(120_000) |
||||
|
||||
servers = await createMultipleServers(2) |
||||
|
||||
await setAccessTokensToServers(servers) |
||||
await setDefaultVideoChannel(servers) |
||||
|
||||
await doubleFollow(servers[0], servers[1]) |
||||
|
||||
await servers[0].config.enableTranscription({ remote: true }) |
||||
runnerToken = await servers[0].runners.autoRegisterRunner() |
||||
}) |
||||
|
||||
async function upload () { |
||||
const { uuid } = await servers[0].videos.upload({ attributes: { name: 'video', language: undefined } }) |
||||
|
||||
const { availableJobs } = await servers[0].runnerJobs.request({ runnerToken }) |
||||
expect(availableJobs).to.have.lengthOf(1) |
||||
|
||||
const jobUUID = availableJobs[0].uuid |
||||
|
||||
const { job } = await servers[0].runnerJobs.accept<RunnerJobTranscriptionPayload>({ runnerToken, jobUUID }) |
||||
return { uuid, job } |
||||
} |
||||
|
||||
it('Should execute a remote transcription job', async function () { |
||||
this.timeout(240_000) |
||||
|
||||
const { uuid, job } = await upload() |
||||
|
||||
expect(job.type === 'video-transcription') |
||||
expect(job.payload.input.videoFileUrl).to.exist |
||||
|
||||
// Check video input file
|
||||
{ |
||||
await servers[0].runnerJobs.getJobFile({ url: job.payload.input.videoFileUrl, jobToken: job.jobToken, runnerToken }) |
||||
} |
||||
|
||||
const payload: TranscriptionSuccess = { |
||||
inputLanguage: 'ar', |
||||
vttFile: 'subtitle-good1.vtt' |
||||
} |
||||
|
||||
await servers[0].runnerJobs.success({ runnerToken, jobUUID: job.uuid, jobToken: job.jobToken, payload }) |
||||
|
||||
await waitJobs(servers) |
||||
|
||||
for (const server of servers) { |
||||
const video = await server.videos.get({ id: uuid }) |
||||
expect(video.language.id).to.equal('ar') |
||||
|
||||
const captions = await server.captions.list({ videoId: uuid }) |
||||
expect(captions) |
||||
} |
||||
|
||||
await checkPersistentTmpIsEmpty(servers[0]) |
||||
}) |
||||
|
||||
it('Should not assign caption/language with an unknown inputLanguage', async function () { |
||||
this.timeout(240_000) |
||||
|
||||
const { uuid, job } = await upload() |
||||
|
||||
const payload: TranscriptionSuccess = { |
||||
inputLanguage: 'toto', |
||||
vttFile: 'subtitle-good1.vtt' |
||||
} |
||||
|
||||
await servers[0].runnerJobs.success({ runnerToken, jobUUID: job.uuid, jobToken: job.jobToken, payload }) |
||||
|
||||
await waitJobs(servers) |
||||
|
||||
for (const server of servers) { |
||||
const video = await server.videos.get({ id: uuid }) |
||||
expect(video.language.id).to.be.null |
||||
|
||||
const { total, data } = await server.captions.list({ videoId: uuid }) |
||||
expect(total).to.equal(0) |
||||
expect(data).to.have.lengthOf(0) |
||||
} |
||||
}) |
||||
|
||||
after(async function () { |
||||
await cleanupTests(servers) |
||||
}) |
||||
}) |
@ -0,0 +1,145 @@
|
||||
/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */ |
||||
|
||||
import { VideoPrivacy } from '@peertube/peertube-models' |
||||
import { |
||||
PeerTubeServer, |
||||
cleanupTests, |
||||
createMultipleServers, |
||||
doubleFollow, |
||||
sendRTMPStream, |
||||
setAccessTokensToServers, |
||||
setDefaultVideoChannel, |
||||
stopFfmpeg, |
||||
waitJobs |
||||
} from '@peertube/peertube-server-commands' |
||||
import { FIXTURE_URLS } from '@tests/shared/fixture-urls.js' |
||||
import { checkCaption, checkLanguage, checkNoCaption, uploadForTranscription } from '@tests/shared/transcription.js' |
||||
|
||||
describe('Test video transcription', function () { |
||||
let servers: PeerTubeServer[] |
||||
|
||||
before(async function () { |
||||
this.timeout(60000) |
||||
|
||||
servers = await createMultipleServers(2) |
||||
|
||||
await setAccessTokensToServers(servers) |
||||
await setDefaultVideoChannel(servers) |
||||
await doubleFollow(servers[0], servers[1]) |
||||
|
||||
await waitJobs(servers) |
||||
await waitJobs(servers) |
||||
}) |
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
it('Should generate a transcription on request', async function () { |
||||
this.timeout(360000) |
||||
|
||||
await servers[0].config.disableTranscription() |
||||
|
||||
const uuid = await uploadForTranscription(servers[0]) |
||||
await waitJobs(servers) |
||||
await checkLanguage(servers, uuid, null) |
||||
|
||||
await servers[0].config.enableTranscription() |
||||
|
||||
await servers[0].captions.runGenerate({ videoId: uuid }) |
||||
await waitJobs(servers) |
||||
await checkLanguage(servers, uuid, 'en') |
||||
|
||||
await checkCaption(servers, uuid) |
||||
}) |
||||
|
||||
it('Should run transcription on upload by default', async function () { |
||||
this.timeout(360000) |
||||
|
||||
const uuid = await uploadForTranscription(servers[0]) |
||||
|
||||
await waitJobs(servers) |
||||
await checkCaption(servers, uuid) |
||||
await checkLanguage(servers, uuid, 'en') |
||||
}) |
||||
|
||||
it('Should run transcription on import by default', async function () { |
||||
this.timeout(360000) |
||||
|
||||
const { video } = await servers[0].videoImports.importVideo({ |
||||
attributes: { |
||||
privacy: VideoPrivacy.PUBLIC, |
||||
targetUrl: FIXTURE_URLS.transcriptionVideo, |
||||
language: undefined |
||||
} |
||||
}) |
||||
|
||||
await waitJobs(servers) |
||||
await checkCaption(servers, video.uuid) |
||||
await checkLanguage(servers, video.uuid, 'en') |
||||
}) |
||||
|
||||
it('Should run transcription when live ended', async function () { |
||||
this.timeout(360000) |
||||
|
||||
await servers[0].config.enableMinimumTranscoding() |
||||
await servers[0].config.enableLive({ allowReplay: true, transcoding: true, resolutions: 'min' }) |
||||
|
||||
const { live, video } = await servers[0].live.quickCreate({ |
||||
saveReplay: true, |
||||
permanentLive: false, |
||||
privacy: VideoPrivacy.PUBLIC |
||||
}) |
||||
|
||||
const ffmpegCommand = sendRTMPStream({ rtmpBaseUrl: live.rtmpUrl, streamKey: live.streamKey }) |
||||
await servers[0].live.waitUntilPublished({ videoId: video.id }) |
||||
|
||||
await stopFfmpeg(ffmpegCommand) |
||||
|
||||
await servers[0].live.waitUntilReplacedByReplay({ videoId: video.id }) |
||||
await waitJobs(servers) |
||||
await checkCaption(servers, video.uuid, 'WEBVTT\n\n00:') |
||||
await checkLanguage(servers, video.uuid, 'en') |
||||
|
||||
await servers[0].config.enableLive({ allowReplay: false }) |
||||
await servers[0].config.disableTranscoding() |
||||
}) |
||||
|
||||
it('Should not run transcription if disabled by user', async function () { |
||||
this.timeout(120000) |
||||
|
||||
{ |
||||
const uuid = await uploadForTranscription(servers[0], { generateTranscription: false }) |
||||
|
||||
await waitJobs(servers) |
||||
await checkNoCaption(servers, uuid) |
||||
await checkLanguage(servers, uuid, null) |
||||
} |
||||
|
||||
{ |
||||
const { video } = await servers[0].videoImports.importVideo({ |
||||
attributes: { |
||||
privacy: VideoPrivacy.PUBLIC, |
||||
targetUrl: FIXTURE_URLS.transcriptionVideo, |
||||
generateTranscription: false |
||||
} |
||||
}) |
||||
|
||||
await waitJobs(servers) |
||||
await checkNoCaption(servers, video.uuid) |
||||
await checkLanguage(servers, video.uuid, null) |
||||
} |
||||
}) |
||||
|
||||
it('Should not run a transcription if the video does not contain audio', async function () { |
||||
this.timeout(120000) |
||||
|
||||
const uuid = await uploadForTranscription(servers[0], { generateTranscription: false }) |
||||
|
||||
await waitJobs(servers) |
||||
await checkNoCaption(servers, uuid) |
||||
await checkLanguage(servers, uuid, null) |
||||
}) |
||||
|
||||
after(async function () { |
||||
await cleanupTests(servers) |
||||
}) |
||||
}) |
@ -0,0 +1,89 @@
|
||||
/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */ |
||||
|
||||
import { wait } from '@peertube/peertube-core-utils' |
||||
import { RunnerJobState } from '@peertube/peertube-models' |
||||
import { |
||||
PeerTubeServer, |
||||
cleanupTests, |
||||
createMultipleServers, |
||||
doubleFollow, |
||||
setAccessTokensToServers, |
||||
setDefaultVideoChannel, |
||||
waitJobs |
||||
} from '@peertube/peertube-server-commands' |
||||
import { checkPeerTubeRunnerCacheIsEmpty } from '@tests/shared/directories.js' |
||||
import { PeerTubeRunnerProcess } from '@tests/shared/peertube-runner-process.js' |
||||
import { checkCaption, checkLanguage, checkNoCaption, uploadForTranscription } from '@tests/shared/transcription.js' |
||||
|
||||
describe('Test transcription in peertube-runner program', function () { |
||||
let servers: PeerTubeServer[] = [] |
||||
let peertubeRunner: PeerTubeRunnerProcess |
||||
|
||||
before(async function () { |
||||
this.timeout(120_000) |
||||
|
||||
servers = await createMultipleServers(2) |
||||
|
||||
await setAccessTokensToServers(servers) |
||||
await setDefaultVideoChannel(servers) |
||||
|
||||
await doubleFollow(servers[0], servers[1]) |
||||
|
||||
await servers[0].config.enableTranscription({ remote: true }) |
||||
|
||||
const registrationToken = await servers[0].runnerRegistrationTokens.getFirstRegistrationToken() |
||||
|
||||
peertubeRunner = new PeerTubeRunnerProcess(servers[0]) |
||||
await peertubeRunner.runServer() |
||||
await peertubeRunner.registerPeerTubeInstance({ registrationToken, runnerName: 'runner' }) |
||||
}) |
||||
|
||||
describe('Running transcription', function () { |
||||
|
||||
it('Should run transcription on classic file', async function () { |
||||
this.timeout(360000) |
||||
|
||||
const uuid = await uploadForTranscription(servers[0]) |
||||
await waitJobs(servers, { runnerJobs: true }) |
||||
|
||||
await checkCaption(servers, uuid) |
||||
await checkLanguage(servers, uuid, 'en') |
||||
}) |
||||
|
||||
it('Should not run transcription on video without audio stream', async function () { |
||||
this.timeout(360000) |
||||
|
||||
const uuid = await uploadForTranscription(servers[0], { fixture: 'video_short_no_audio.mp4' }) |
||||
|
||||
await waitJobs(servers) |
||||
|
||||
let continueWhile = true |
||||
while (continueWhile) { |
||||
await wait(500) |
||||
|
||||
const { data } = await servers[0].runnerJobs.list({ stateOneOf: [ RunnerJobState.ERRORED ] }) |
||||
|
||||
continueWhile = !data.some(j => j.type === 'video-transcription') |
||||
} |
||||
|
||||
await checkNoCaption(servers, uuid) |
||||
await checkLanguage(servers, uuid, null) |
||||
}) |
||||
}) |
||||
|
||||
describe('Check cleanup', function () { |
||||
|
||||
it('Should have an empty cache directory', async function () { |
||||
await checkPeerTubeRunnerCacheIsEmpty(peertubeRunner, 'transcription') |
||||
}) |
||||
}) |
||||
|
||||
after(async function () { |
||||
if (peertubeRunner) { |
||||
await peertubeRunner.unregisterPeerTubeInstance({ runnerName: 'runner' }) |
||||
peertubeRunner.kill() |
||||
} |
||||
|
||||
await cleanupTests(servers) |
||||
}) |
||||
}) |
@ -0,0 +1,81 @@
|
||||
/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */ |
||||
|
||||
import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils' |
||||
import { PeerTubeServer, VideoEdit } from '@peertube/peertube-server-commands' |
||||
import { downloadFile, unzip } from '@peertube/peertube-transcription-devtools' |
||||
import { expect } from 'chai' |
||||
import { ensureDir, pathExists } from 'fs-extra/esm' |
||||
import { join } from 'path' |
||||
import { testCaptionFile } from './captions.js' |
||||
import { FIXTURE_URLS } from './fixture-urls.js' |
||||
|
||||
type CustomModelName = 'tiny.pt' | 'faster-whisper-tiny' |
||||
|
||||
export async function downloadCustomModelsIfNeeded (modelName: CustomModelName) { |
||||
if (await pathExists(getCustomModelPath(modelName))) return |
||||
|
||||
await ensureDir(getCustomModelDirectory()) |
||||
await unzip(await downloadFile(FIXTURE_URLS.transcriptionModels, getCustomModelDirectory())) |
||||
} |
||||
|
||||
export function getCustomModelDirectory () { |
||||
return buildAbsoluteFixturePath(join('transcription', 'models-v1')) |
||||
} |
||||
|
||||
export function getCustomModelPath (modelName: CustomModelName) { |
||||
return join(getCustomModelDirectory(), 'models', modelName) |
||||
} |
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export async function checkCaption (servers: PeerTubeServer[], uuid: string, captionContains = 'WEBVTT\n\n00:00.000 --> 00:') { |
||||
for (const server of servers) { |
||||
const body = await server.captions.list({ videoId: uuid }) |
||||
expect(body.total).to.equal(1) |
||||
expect(body.data).to.have.lengthOf(1) |
||||
|
||||
const caption = body.data[0] |
||||
expect(caption.language.id).to.equal('en') |
||||
expect(caption.language.label).to.equal('English') |
||||
|
||||
{ |
||||
await testCaptionFile(server.url, caption.captionPath, captionContains) |
||||
} |
||||
} |
||||
} |
||||
|
||||
export async function checkNoCaption (servers: PeerTubeServer[], uuid: string) { |
||||
for (const server of servers) { |
||||
const body = await server.captions.list({ videoId: uuid }) |
||||
expect(body.total).to.equal(0) |
||||
expect(body.data).to.have.lengthOf(0) |
||||
} |
||||
} |
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export async function checkLanguage (servers: PeerTubeServer[], uuid: string, expected: string | null) { |
||||
for (const server of servers) { |
||||
const video = await server.videos.get({ id: uuid }) |
||||
|
||||
if (expected) { |
||||
expect(video.language.id).to.equal(expected) |
||||
} else { |
||||
expect(video.language.id).to.be.null |
||||
} |
||||
} |
||||
} |
||||
|
||||
export async function uploadForTranscription (server: PeerTubeServer, body: Partial<VideoEdit> = {}) { |
||||
const { uuid } = await server.videos.upload({ |
||||
attributes: { |
||||
name: 'video', |
||||
fixture: join('transcription', 'videos', 'the_last_man_on_earth.mp4'), |
||||
language: undefined, |
||||
|
||||
...body |
||||
} |
||||
}) |
||||
|
||||
return uuid |
||||
} |
@ -1,17 +1,16 @@
|
||||
import { transcriberFactory } from '@peertube/peertube-transcription' |
||||
import { TranscriptionEngineName, transcriberFactory } from '@peertube/peertube-transcription' |
||||
import { createLogger } from 'winston' |
||||
|
||||
describe('Transcriber factory', function () { |
||||
const transcribers = [ |
||||
'openai-whisper', |
||||
'whisper-ctranslate2', |
||||
'whisper-timestamped' |
||||
] |
||||
const transcribers: TranscriptionEngineName[] = [ 'openai-whisper', 'whisper-ctranslate2' ] |
||||
|
||||
describe('Should be able to create a transcriber for each available transcription engine', function () { |
||||
transcribers.forEach(function (transcriberName) { |
||||
|
||||
for (const transcriberName of transcribers) { |
||||
it(`Should be able to create a(n) ${transcriberName} transcriber`, function () { |
||||
transcriberFactory.createFromEngineName(transcriberName) |
||||
transcriberFactory.createFromEngineName({ engineName: transcriberName, logger: createLogger() }) |
||||
}) |
||||
}) |
||||
} |
||||
|
||||
}) |
||||
}) |
||||
|
@ -1 +0,0 @@
|
||||
describe('Transcription run', function () {}) |
@ -1,133 +0,0 @@
|
||||
/* eslint-disable @typescript-eslint/no-unused-expressions, max-len */ |
||||
import { expect, config } from 'chai' |
||||
import { createLogger } from 'winston' |
||||
import { join } from 'node:path' |
||||
import { mkdir, rm } from 'node:fs/promises' |
||||
import { tmpdir } from 'node:os' |
||||
import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils' |
||||
import { |
||||
OpenaiTranscriber, |
||||
WhisperTimestampedTranscriber, |
||||
TranscriptFileEvaluator, |
||||
TranscriptionModel, |
||||
WhisperTranscribeArgs, |
||||
levenshteinDistance, downloadFile, unzip |
||||
} from '@peertube/peertube-transcription' |
||||
import { FIXTURE_URLS } from '@tests/shared/fixture-urls.js' |
||||
|
||||
config.truncateThreshold = 0 |
||||
|
||||
describe('Linto timestamped Whisper transcriber', function () { |
||||
const tmpDirectory = join(tmpdir(), 'peertube-transcription') |
||||
const transcriptDirectory = join(tmpDirectory, 'transcriber', 'timestamped') |
||||
const modelsDirectory = join(tmpDirectory, 'models') |
||||
const shortVideoPath = buildAbsoluteFixturePath('transcription/videos/the_last_man_on_earth.mp4') |
||||
const frVideoPath = buildAbsoluteFixturePath('transcription/videos/derive_sectaire.mp4') |
||||
const transcriber = new WhisperTimestampedTranscriber( |
||||
{ |
||||
name: 'whisper-timestamped', |
||||
requirements: [], |
||||
type: 'binary', |
||||
binary: 'whisper_timestamped', |
||||
supportedModelFormats: [ 'PyTorch' ], |
||||
languageDetection: true |
||||
}, |
||||
createLogger(), |
||||
transcriptDirectory |
||||
) |
||||
|
||||
before(async function () { |
||||
this.timeout(1 * 1000 * 60) |
||||
await mkdir(transcriptDirectory, { recursive: true }) |
||||
await unzip(await downloadFile(FIXTURE_URLS.transcriptionModels, tmpDirectory)) |
||||
}) |
||||
|
||||
it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () { |
||||
this.timeout(1 * 1000 * 60) |
||||
const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en' }) |
||||
|
||||
expect(transcript.format).to.equals('vtt') |
||||
expect(transcript.language).to.equals('en') |
||||
expect(await transcript.read()).not.to.be.empty |
||||
}) |
||||
|
||||
it('May produce a transcript file in the `srt` format with a ms precision', async function () { |
||||
const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'srt' }) |
||||
|
||||
expect(transcript.format).to.equals('srt') |
||||
expect(transcript.language).to.equals('en') |
||||
expect(await transcript.read()).not.to.be.empty |
||||
}) |
||||
|
||||
it('May produce a transcript file in `txt` format', async function () { |
||||
const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'txt' }) |
||||
|
||||
expect(transcript.format).to.equals('txt') |
||||
expect(transcript.language).to.equals('en') |
||||
expect(await transcript.read()).not.to.be.empty |
||||
expect(levenshteinDistance( |
||||
(await transcript.read()).toString(), |
||||
'December 1965, is that all it has been since I inherited the world only three years, seems like a hundred million.' |
||||
)).to.be.below(10) |
||||
}) |
||||
|
||||
it('May transcribe a media file using a local PyTorch model file', async function () { |
||||
this.timeout(2 * 1000 * 60) |
||||
await transcriber.transcribe({ |
||||
mediaFilePath: shortVideoPath, |
||||
model: await TranscriptionModel.fromPath(join(modelsDirectory, 'tiny.pt')), |
||||
language: 'en' |
||||
}) |
||||
}) |
||||
|
||||
it('May transcribe a media file in french', async function () { |
||||
this.timeout(2 * 1000 * 60) |
||||
const transcript = await transcriber.transcribe({ |
||||
mediaFilePath: frVideoPath, |
||||
language: 'fr', |
||||
format: 'txt' |
||||
}) |
||||
|
||||
expect(transcript.format).to.equals('txt') |
||||
expect(transcript.language).to.equals('fr') |
||||
expect(await transcript.read()).not.to.be.empty |
||||
}) |
||||
|
||||
it('Guesses the video language if not provided', async function () { |
||||
this.timeout(2 * 1000 * 60) |
||||
const transcript = await transcriber.transcribe({ mediaFilePath: frVideoPath }) |
||||
expect(transcript.language).to.equals('fr') |
||||
}) |
||||
|
||||
it('Should produce a text transcript similar to openai-whisper implementation', async function () { |
||||
this.timeout(11 * 1000 * 60) |
||||
const transcribeArgs: WhisperTranscribeArgs = { |
||||
mediaFilePath: frVideoPath, |
||||
model: await TranscriptionModel.fromPath(join(modelsDirectory, 'tiny.pt')), |
||||
language: 'fr', |
||||
format: 'txt' |
||||
} |
||||
const transcript = await transcriber.transcribe(transcribeArgs) |
||||
|
||||
const openaiTranscriber = new OpenaiTranscriber( |
||||
{ |
||||
name: 'openai-whisper', |
||||
requirements: [], |
||||
type: 'binary', |
||||
binary: 'whisper', |
||||
supportedModelFormats: [ 'PyTorch' ] |
||||
}, |
||||
createLogger(), |
||||
join(transcriptDirectory, 'openai-whisper') |
||||
) |
||||
const openaiTranscript = await openaiTranscriber.transcribe(transcribeArgs) |
||||
|
||||
const transcriptFileEvaluator = new TranscriptFileEvaluator(openaiTranscript, transcript) |
||||
expect(await transcriptFileEvaluator.wer()).to.be.below(25 / 100) |
||||
expect(await transcriptFileEvaluator.cer()).to.be.below(15 / 100) |
||||
}) |
||||
|
||||
after(async function () { |
||||
await rm(transcriptDirectory, { recursive: true, force: true }) |
||||
}) |
||||
}) |
@ -0,0 +1,63 @@
|
||||
# Transcription DevTools |
||||
|
||||
Includes: |
||||
* __JiWER__ CLI NodeJS wrapper |
||||
* Benchmark tool to test multiple transcription engines |
||||
* TypeScript classes to evaluate word-error-rate of files generated by the transcription |
||||
|
||||
## Build |
||||
|
||||
```sh |
||||
npm run build |
||||
``` |
||||
|
||||
## Benchmark |
||||
|
||||
A benchmark of available __transcribers__ might be run with: |
||||
```sh |
||||
npm run benchmark |
||||
``` |
||||
``` |
||||
┌────────────────────────┬───────────────────────┬───────────────────────┬──────────┬────────┬───────────────────────┐ |
||||
│ (index) │ WER │ CER │ duration │ model │ engine │ |
||||
├────────────────────────┼───────────────────────┼───────────────────────┼──────────┼────────┼───────────────────────┤ |
||||
│ 5yZGBYqojXe7nuhq1TuHvz │ '28.39506172839506%' │ '9.62457337883959%' │ '41s' │ 'tiny' │ 'openai-whisper' │ |
||||
│ x6qREJ2AkTU4e5YmvfivQN │ '29.75206611570248%' │ '10.46195652173913%' │ '15s' │ 'tiny' │ 'whisper-ctranslate2' │ |
||||
└────────────────────────┴───────────────────────┴───────────────────────┴──────────┴────────┴───────────────────────┘ |
||||
``` |
||||
|
||||
The benchmark may be run with multiple model builtin sizes: |
||||
|
||||
```sh |
||||
MODELS=tiny,small,large npm run benchmark |
||||
``` |
||||
|
||||
## Jiwer |
||||
|
||||
> *JiWER is a python tool for computing the word-error-rate of ASR systems.* |
||||
> https://jitsi.github.io/jiwer/cli/ |
||||
|
||||
__JiWER__ serves as a reference implementation to calculate errors rates between 2 text files: |
||||
- WER (Word Error Rate) |
||||
- CER (Character Error Rate) |
||||
|
||||
|
||||
### Usage |
||||
|
||||
```typescript |
||||
const jiwerCLI = new JiwerClI('./reference.txt', './hypothesis.txt') |
||||
|
||||
// WER as a percentage, ex: 0.03 -> 3% |
||||
console.log(await jiwerCLI.wer()) |
||||
|
||||
// CER as a percentage: 0.01 -> 1% |
||||
console.log(await jiwerCLI.cer()) |
||||
|
||||
// Detailed comparison report |
||||
console.log(await jiwerCLI.alignment()) |
||||
``` |
||||
|
||||
## Resources |
||||
|
||||
- https://jitsi.github.io/jiwer/ |
||||
- https://github.com/rapidfuzz/RapidFuzz |
@ -0,0 +1,5 @@
|
||||
export * from './jiwer-cli.js' |
||||
export * from './levenshtein.js' |
||||
export * from './transcript-file-evaluator-interface.js' |
||||
export * from './transcript-file-evaluator.js' |
||||
export * from './utils.js' |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue