Skip to content

Commit

Permalink
Move export to google drive functionality to export-media lambda
Browse files Browse the repository at this point in the history
  • Loading branch information
philmcmahon committed Jan 10, 2025
1 parent c803341 commit f2e538f
Show file tree
Hide file tree
Showing 11 changed files with 198 additions and 184 deletions.
62 changes: 1 addition & 61 deletions packages/api/src/export.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import {
downloadObject,
getObjectSize,
getObjectText,
isS3Failure,
logger,
Expand All @@ -12,70 +10,12 @@ import {
ExportStatus,
ExportStatuses,
ExportType,
ZTokenResponse,
} from '@guardian/transcription-service-common';
import {
uploadFileToGoogleDrive,
uploadToGoogleDocs,
} from './services/googleDrive';
import { uploadToGoogleDocs } from './services/googleDrive';
import { S3Client } from '@aws-sdk/client-s3';
import { docs_v1, drive_v3 } from 'googleapis';
import Drive = drive_v3.Drive;
import Docs = docs_v1.Docs;
import { LAMBDA_MAX_EPHEMERAL_STORAGE_BYTES } from './services/lambda';

export const exportMediaToDrive = async (
config: TranscriptionConfig,
s3Client: S3Client,
item: TranscriptionDynamoItem,
oAuthTokenResponse: ZTokenResponse,
folderId: string,
): Promise<ExportStatus> => {
logger.info(`Starting source media export`);
const mediaSize = await getObjectSize(
s3Client,
config.app.sourceMediaBucket,
item.id,
);
if (mediaSize && mediaSize > LAMBDA_MAX_EPHEMERAL_STORAGE_BYTES) {
const msg = `Media file too large to export to google drive. Please manually download the file and upload using the google drive UI`;
return {
exportType: 'source-media',
status: 'failure',
message: msg,
};
}
const filePath = `/tmp/${item.id.replace('/', '_')}`;
const extension = await downloadObject(
s3Client,
config.app.sourceMediaBucket,
item.id,
filePath,
);

const mimeType = 'application/octet-stream';

// default to mp4 on the assumption that most media exported will be video
const extensionOrMp4 = extension || 'mp4';

const fileName = item.originalFilename.endsWith(`.${extensionOrMp4}`)
? item.originalFilename
: `${item.originalFilename}.${extensionOrMp4 || 'mp4'}`;

const id = await uploadFileToGoogleDrive(
fileName,
oAuthTokenResponse,
filePath,
mimeType,
folderId,
);
logger.info(`Source media export complete, file id: ${id}`);
return {
exportType: 'source-media',
id,
status: 'success',
};
};

export const exportTranscriptToDoc = async (
config: TranscriptionConfig,
Expand Down
15 changes: 13 additions & 2 deletions packages/api/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import {
CreateFolderRequest,
signedUrlRequestBody,
ExportStatuses,
ExportStatus,
} from '@guardian/transcription-service-common';
import type { SignedUrlResponseBody } from '@guardian/transcription-service-common';
import {
Expand Down Expand Up @@ -340,8 +341,18 @@ const getApp = async () => {
JSON.stringify(exportRequest.data),
);
} catch (e) {
logger.error('Failed to invoke media export lambda', e);
res.status(500).send('Failed to request media export');
const msg = 'Failed to invoke media export lambda';
logger.error(msg, e);
const mediaFailedStatus: ExportStatus = {
status: 'failure',
exportType: 'source-media',
message: msg,
};
currentStatuses = updateStatus(mediaFailedStatus, currentStatuses);
await writeTranscriptionItem(dynamoClient, config.app.tableName, {
...item,
exportStatus: currentStatuses,
});
}
res.send(JSON.stringify(currentStatuses));

Expand Down
8 changes: 7 additions & 1 deletion packages/api/src/services/googleDrive.ts
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ export const uploadFileToGoogleDrive = async (
let offset = 0;

for await (const chunk of fileStream) {
// pause the stream to prevent node from buffering any more data whilst we upload
fileStream.pause();
const chunkSize = chunk.length;
const range = `bytes ${offset}-${offset + chunkSize - 1}/${fileSize}`;

Expand All @@ -193,7 +195,9 @@ export const uploadFileToGoogleDrive = async (
// Response status is 308 until the final chunk. Final response includes file metadata
return ((await response.json()) as { id: string }).id;
}
if (response.status !== 308) {
if (response.status === 308) {
//continue
} else {
const text = await response.text();
logger.error(`Received ${response.status} from google, error: ${text}`);
throw new Error(
Expand All @@ -202,6 +206,8 @@ export const uploadFileToGoogleDrive = async (
}

offset += chunkSize;
fileStream.resume();
}

throw new Error('Failed to upload file');
};
3 changes: 2 additions & 1 deletion packages/api/src/services/lambda.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ export const invokeLambda = async (

const response = await lambdaClient.send(command);

if (response.StatusCode === 200) {
// see https://docs.aws.amazon.com/lambda/latest/api/API_Invoke.html for details of the response
if (response.StatusCode === 200 || response.StatusCode === 202) {
logger.info('Invocation successful');
return;
} else {
Expand Down
8 changes: 5 additions & 3 deletions packages/backend-common/src/s3.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ export const downloadObject = async (
return data.Metadata?.['extension'];
};

const bytesToMB = (bytes: number) => Math.floor(bytes / 1024 / 1024);

const downloadS3Data = async (
data: Readable,
destinationPath: string,
Expand All @@ -172,9 +174,10 @@ const downloadS3Data = async (
) => {
let downloadedBytes = 0;
let lastLoggedPercentage = 0;
const contentLengthMb = contentLength && bytesToMB(contentLength);
data.on('data', (chunk) => {
downloadedBytes += chunk.length;
if (contentLength) {
if (contentLength && contentLengthMb) {
const percentage = Math.floor((downloadedBytes / contentLength) * 100);
if (
downloadedBytes > 0 &&
Expand All @@ -183,7 +186,7 @@ const downloadS3Data = async (
) {
lastLoggedPercentage = percentage;
logger.info(
`Downloaded ${downloadedBytes} of ${contentLength} bytes so far ${contentLength ? `(${percentage}%)` : ''} for ${key}`,
`Downloaded ${bytesToMB(downloadedBytes)} of ${contentLengthMb} MB so far (${percentage}%) for ${key}`,
);
}
}
Expand All @@ -193,7 +196,6 @@ const downloadS3Data = async (
data
.pipe(stream)
.on('finish', () => {
logger.debug('stream pipe done');
resolve();
})
.on('error', (error) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2125,9 +2125,6 @@ service transcription-service-worker start",
"STAGE": "TEST",
},
},
"EphemeralStorage": {
"Size": 10240,
},
"Handler": "index.api",
"LoggingConfig": {
"LogFormat": "JSON",
Expand Down Expand Up @@ -2162,7 +2159,7 @@ service transcription-service-worker start",
"Value": "TEST",
},
],
"Timeout": 900,
"Timeout": 30,
},
"Type": "AWS::Lambda::Function",
},
Expand Down Expand Up @@ -2933,11 +2930,14 @@ service transcription-service-worker start",
"STAGE": "TEST",
},
},
"EphemeralStorage": {
"Size": 10240,
},
"Handler": "index.mediaExport",
"LoggingConfig": {
"LogFormat": "JSON",
},
"MemorySize": 512,
"MemorySize": 2048,
"Role": {
"Fn::GetAtt": [
"transcriptionservicemediaexportServiceRoleF9FB6814",
Expand Down Expand Up @@ -2967,7 +2967,7 @@ service transcription-service-worker start",
"Value": "TEST",
},
],
"Timeout": 30,
"Timeout": 900,
},
"Type": "AWS::Lambda::Function",
},
Expand Down
29 changes: 26 additions & 3 deletions packages/cdk/lib/transcription-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ import { Topic } from 'aws-cdk-lib/aws-sns';
import { Queue } from 'aws-cdk-lib/aws-sqs';
import { JsonPath } from 'aws-cdk-lib/aws-stepfunctions';

const topicArnToName = (topicArn: string) => {
const split = topicArn.split(':');
return split[split.length - 1] ?? '';
};

export class TranscriptionService extends GuStack {
constructor(scope: App, id: string, props: GuStackProps) {
super(scope, id, props);
Expand Down Expand Up @@ -200,9 +205,6 @@ export class TranscriptionService extends GuStack {
noMonitoring: true,
},
app: `${APP_NAME}-api`,
ephemeralStorageSize: Size.mebibytes(10240),
memorySize: 512,
timeout: Duration.seconds(900),
api: {
id: apiId,
description: 'API for transcription service frontend',
Expand Down Expand Up @@ -534,6 +536,8 @@ export class TranscriptionService extends GuStack {
},
).valueAsString;

const alarmTopicName = topicArnToName(alarmTopicArn);

const mediaDownloadTask = new GuEcsTask(this, 'media-download-task', {
app: mediaDownloadApp,
vpc,
Expand Down Expand Up @@ -696,6 +700,14 @@ export class TranscriptionService extends GuStack {
handler: 'index.outputHandler',
runtime: Runtime.NODEJS_20_X,
app: `${APP_NAME}-output-handler`,
errorPercentageMonitoring:
this.stage === 'PROD'
? {
toleratedErrorPercentage: 0,
noMonitoring: false,
snsTopicName: alarmTopicName,
}
: undefined,
},
);

Expand Down Expand Up @@ -737,6 +749,17 @@ export class TranscriptionService extends GuStack {
handler: 'index.mediaExport',
runtime: Runtime.NODEJS_20_X,
app: `${APP_NAME}-media-export`,
ephemeralStorageSize: Size.mebibytes(10240),
memorySize: 2048,
timeout: Duration.seconds(900),
errorPercentageMonitoring:
this.stage === 'PROD'
? {
toleratedErrorPercentage: 0,
noMonitoring: false,
snsTopicName: alarmTopicName,
}
: undefined,
},
);

Expand Down
38 changes: 21 additions & 17 deletions packages/client/src/components/ExportForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -65,27 +65,39 @@ const statusToMessage = (status: RequestStatus): string => {
return 'One or more exports failed. See below for details';
case RequestStatus.Success:
return 'All exports complete. See below for links to your files';
case RequestStatus.CreatingFolder:
return "Export in progress... If nothing happens, make sure that your browser isn't blocking pop-ups.";
case RequestStatus.TranscriptExportInProgress:
return 'Export in progress. Links to your files will soon appear below.';
case RequestStatus.InProgress:
return 'Export in progress. Your transcript text should be available immediately, input media may take a few minutes. Use the button below to check the folder where exported items will be saved';
return `Export in progress. See below for links to your transcript files. Source media may take several
minutes - you can stay on this page or click the button to open the google drive folder and wait
for it to appear there.`;
case RequestStatus.Ready:
default:
return '';
}
};

const exportTypesToStatus = (exportTypes: ExportType[]): ExportStatuses => {
return exportTypes.map((type) => ({
status: 'in-progress',
exportType: type,
}));
};

const ExportForm = () => {
const { token } = useContext(AuthContext);
const searchParams = useSearchParams();
const [folderId, setFolderId] = useState<string | undefined>();
const [creatingFolder, setCreatingFolder] = useState(false);
const [failureMessage, setFailureMessage] = useState<string>('');
const [requestStatus, setRequestStatus] = useState<RequestStatus>(
RequestStatus.Ready,
);
const [exportTypesRequested, setExportTypesRequested] = useState<
ExportType[]
>(['text']);
const [exportStatuses, setExportStatuses] = useState<ExportStatus[]>([]);
const [exportStatuses, setExportStatuses] = useState<ExportStatuses>([]);

// TODO: once we have some CSS/component library, tidy up this messy error handling
if (!token) {
Expand All @@ -111,17 +123,8 @@ const ExportForm = () => {
/>
);
}
if (creatingFolder) {
return (
<InfoMessage
message={
"Export in progress... If nothing happens, make sure that your browser isn't blocking pop-ups."
}
status={RequestStatus.InProgress}
/>
);
}
if (folderId) {

if (requestStatus !== RequestStatus.Ready) {
return (
<>
<div className="mb-6">
Expand Down Expand Up @@ -208,7 +211,7 @@ const ExportForm = () => {
};

const exportHandler = async () => {
setCreatingFolder(true);
setRequestStatus(RequestStatus.CreatingFolder);
try {
const tokenResponse = await getOAuthToken(token);
const createFolderResponse = await createExportFolder(
Expand All @@ -223,8 +226,8 @@ const ExportForm = () => {
return;
}
const folderId = await createFolderResponse.text();
setCreatingFolder(false);
setRequestStatus(RequestStatus.InProgress);
setRequestStatus(RequestStatus.TranscriptExportInProgress);
setExportStatuses(exportTypesToStatus(exportTypesRequested));
setFolderId(folderId);
const exportResponse = await exportTranscript(
token,
Expand All @@ -250,6 +253,7 @@ const ExportForm = () => {
return;
}
await updateStatuses();
setRequestStatus(RequestStatus.InProgress);
setExportStatuses(parsedResponse.data);
} catch (error) {
console.error('Export failed', error);
Expand Down
2 changes: 2 additions & 0 deletions packages/client/src/components/InfoMessage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import { ExportStatus } from '@guardian/transcription-service-common';
export const iconForStatus = (status: RequestStatus) => {
switch (status) {
case RequestStatus.InProgress:
case RequestStatus.CreatingFolder:
case RequestStatus.TranscriptExportInProgress:
return <Spinner className={'w-6 h-6'} />;
case RequestStatus.Invalid:
return <ExclamationCircleIcon className={'w-6 h-6 text-red-500'} />;
Expand Down
Loading

0 comments on commit f2e538f

Please sign in to comment.