Mercurial > public > finance-parser
view upload_document/app.py @ 3:2e5f3664f3e4
documents analyzer almost finished
author | Dennis C. M. <dennis@denniscm.com> |
---|---|
date | Fri, 02 Jun 2023 20:12:29 +0100 |
parents | |
children | 9005b7590008 |
line wrap: on
line source
import json import boto3 import re s3_client = boto3.client('s3') dynamodb = boto3.resource('dynamodb') table = dynamodb.Table('FinanceParser') def lambda_handler(event, context): event_message = event['body']['message'] object_key = event_message['objectKey'] bucket_name = event_message['bucketName'] company_ticker = re.search('processed/(.*)_', object_key).group(1) # Download file from s3 s3_client.download_file(bucket_name, object_key, '/tmp/document.json') with open('/tmp/document.json') as f: doc = json.load(f) for dateColumn, date in doc['dateColumns'].items(): for row_index, account in doc['data'].items(): try: column_types = account['type'] except KeyError: column_types = [] """ The following statement avoids getting a `2020` as the value of `ASSETS`. +------------------+------+------+ | ASSETS | 2020 | 2019 | +------------------+------+------+ | ASSETS_ACCOUNT_1 | | | +------------------+------+------+ | ASSETS_ACCOUNT_2 | | | +------------------+------+------+ """ account_value = account[dateColumn] if 'COLUMN_HEADER' in column_types and date == account_value: account_value = '' with table.batch_writer() as batch: # pk -> item_type#company_ticker # sk -> date#row_index batch.put_item( Item={ 'pk': f'balance#{company_ticker}', 'sk': f'{date}#{row_index}', 'account_name': account['1'], 'account_value': account_value, 'column_types': column_types } ) # pk -> item_type#company_ticker # sk -> date table.put_item( Item={ 'pk': f'file#{company_ticker}', 'sk': f"{date}", 'filename': object_key.replace('processed/', '') } ) return { "statusCode": 200, "body": json.dumps({ "message": "ok" }), }