Mercurial > public > finance-parser
view upload_document/app.py @ 10:2350662483a3
fix minor bugs
author | Dennis C. M. <dennis@denniscm.com> |
---|---|
date | Thu, 08 Jun 2023 17:16:36 +0100 |
parents | d15ccf5f1373 |
children | d09dee7a86da |
line wrap: on
line source
import json import boto3 s3_client = boto3.client('s3') dynamodb = boto3.resource('dynamodb') table = dynamodb.Table('FinanceParser') def lambda_handler(event, context): event_msg = event['body']['message'] # Download file from s3 s3_client.download_file( event_msg['bucketName'], event_msg['objectKey'], '/tmp/document.json' ) with open('/tmp/document.json') as f: doc = json.load(f) for dateColumn, date in doc['dateColumns'].items(): for row_index, account in doc['data'].items(): try: column_types = account['type'] except KeyError: column_types = [] """ Given: +------------------+------+------+ | ASSETS | 2020 | 2019 | +------------------+------+------+ | ASSETS_ACCOUNT_1 | | | +------------------+------+------+ | ASSETS_ACCOUNT_2 | | | +------------------+------+------+ The following statement avoids getting `2020` as the value of `ASSETS`. """ try: account_value = account[dateColumn] except KeyError: account_value = '' if 'COLUMN_HEADER' in column_types and date == account_value: account_value = '' with table.batch_writer() as batch: # pk -> item_type#company_ticker # sk -> date#row_index batch.put_item( Item={ 'pk': f"balance#{event_msg['companyTicker']}", 'sk': f'{date}#{row_index}', 'account_name': account['1'], 'account_value': account_value, 'column_types': column_types, 'format': doc['format'] } ) # pk -> item_type#company_ticker # sk -> date#filename table.put_item( Item={ 'pk': f"file#balance#{event_msg['companyTicker']}", 'sk': f"{date}#{event_msg['objectKey'].replace('processed/', '')}" } ) return { "statusCode": 200, "body": json.dumps({ "message": "ok" }), }