comparison upload_document/app.py @ 3:2e5f3664f3e4

documents analyzer almost finished
author Dennis C. M. <dennis@denniscm.com>
date Fri, 02 Jun 2023 20:12:29 +0100
parents
children 9005b7590008
comparison
equal deleted inserted replaced
2:ef8a4d95755a 3:2e5f3664f3e4
1 import json
2 import boto3
3 import re
4
5 s3_client = boto3.client('s3')
6 dynamodb = boto3.resource('dynamodb')
7 table = dynamodb.Table('FinanceParser')
8
9
10 def lambda_handler(event, context):
11 event_message = event['body']['message']
12 object_key = event_message['objectKey']
13 bucket_name = event_message['bucketName']
14 company_ticker = re.search('processed/(.*)_', object_key).group(1)
15
16 # Download file from s3
17 s3_client.download_file(bucket_name, object_key, '/tmp/document.json')
18
19 with open('/tmp/document.json') as f:
20 doc = json.load(f)
21
22 for dateColumn, date in doc['dateColumns'].items():
23 for row_index, account in doc['data'].items():
24
25 try:
26 column_types = account['type']
27 except KeyError:
28 column_types = []
29
30 """
31 The following statement avoids getting a `2020` as the value
32 of `ASSETS`.
33
34 +------------------+------+------+
35 | ASSETS | 2020 | 2019 |
36 +------------------+------+------+
37 | ASSETS_ACCOUNT_1 | | |
38 +------------------+------+------+
39 | ASSETS_ACCOUNT_2 | | |
40 +------------------+------+------+
41 """
42
43 account_value = account[dateColumn]
44 if 'COLUMN_HEADER' in column_types and date == account_value:
45 account_value = ''
46
47 with table.batch_writer() as batch:
48
49 # pk -> item_type#company_ticker
50 # sk -> date#row_index
51
52 batch.put_item(
53 Item={
54 'pk': f'balance#{company_ticker}',
55 'sk': f'{date}#{row_index}',
56 'account_name': account['1'],
57 'account_value': account_value,
58 'column_types': column_types
59 }
60 )
61
62 # pk -> item_type#company_ticker
63 # sk -> date
64
65 table.put_item(
66 Item={
67 'pk': f'file#{company_ticker}',
68 'sk': f"{date}",
69 'filename': object_key.replace('processed/', '')
70 }
71 )
72
73 return {
74 "statusCode": 200,
75 "body": json.dumps({
76 "message": "ok"
77 }),
78 }