Mercurial > public > finance-parser
view analyze_document/app.py @ 9:bf19235a9636
minor bugs and add sample reports
author | Dennis C. M. <dennis@denniscm.com> |
---|---|
date | Wed, 07 Jun 2023 11:07:14 +0100 |
parents | d15ccf5f1373 |
children | 2350662483a3 |
line wrap: on
line source
import json import boto3 import uuid import re textract_client = boto3.client('textract') s3_client = boto3.client('s3') def lambda_handler(event, context): event_detail = event['detail'] bucket_name = event_detail['bucket']['name'] object_key = event_detail['object']['key'] company_ticker = re.search('unprocessed/(.*)_', object_key).group(1) doc_type = re.search(f'unprocessed/{company_ticker}_(.*)_', object_key).group(1) file_id = str(uuid.uuid4()) data_dict = textract_client.analyze_document( Document={'S3Object': {'Bucket': bucket_name, 'Name': object_key}}, FeatureTypes=['TABLES'] ) data_string = json.dumps(data_dict, indent=2, default=str) filename = f'{company_ticker}_{doc_type}_{file_id}.json' s3_client.put_object( Bucket=bucket_name, Key=f'analyzed/{filename}', Body=data_string ) s3_client.delete_object( Bucket=bucket_name, Key=object_key ) return { "statusCode": 200, "body": { "message": { "companyTicker": company_ticker, "docType": doc_type, "fileId": file_id, "fileName": filename, "objectKey": f'analyzed/{filename}', "bucketName": bucket_name } }, }