Mercurial > public > finance-parser
comparison upload_document/app.py @ 3:2e5f3664f3e4
documents analyzer almost finished
author | Dennis C. M. <dennis@denniscm.com> |
---|---|
date | Fri, 02 Jun 2023 20:12:29 +0100 |
parents | |
children | 9005b7590008 |
comparison
equal
deleted
inserted
replaced
2:ef8a4d95755a | 3:2e5f3664f3e4 |
---|---|
1 import json | |
2 import boto3 | |
3 import re | |
4 | |
5 s3_client = boto3.client('s3') | |
6 dynamodb = boto3.resource('dynamodb') | |
7 table = dynamodb.Table('FinanceParser') | |
8 | |
9 | |
10 def lambda_handler(event, context): | |
11 event_message = event['body']['message'] | |
12 object_key = event_message['objectKey'] | |
13 bucket_name = event_message['bucketName'] | |
14 company_ticker = re.search('processed/(.*)_', object_key).group(1) | |
15 | |
16 # Download file from s3 | |
17 s3_client.download_file(bucket_name, object_key, '/tmp/document.json') | |
18 | |
19 with open('/tmp/document.json') as f: | |
20 doc = json.load(f) | |
21 | |
22 for dateColumn, date in doc['dateColumns'].items(): | |
23 for row_index, account in doc['data'].items(): | |
24 | |
25 try: | |
26 column_types = account['type'] | |
27 except KeyError: | |
28 column_types = [] | |
29 | |
30 """ | |
31 The following statement avoids getting a `2020` as the value | |
32 of `ASSETS`. | |
33 | |
34 +------------------+------+------+ | |
35 | ASSETS | 2020 | 2019 | | |
36 +------------------+------+------+ | |
37 | ASSETS_ACCOUNT_1 | | | | |
38 +------------------+------+------+ | |
39 | ASSETS_ACCOUNT_2 | | | | |
40 +------------------+------+------+ | |
41 """ | |
42 | |
43 account_value = account[dateColumn] | |
44 if 'COLUMN_HEADER' in column_types and date == account_value: | |
45 account_value = '' | |
46 | |
47 with table.batch_writer() as batch: | |
48 | |
49 # pk -> item_type#company_ticker | |
50 # sk -> date#row_index | |
51 | |
52 batch.put_item( | |
53 Item={ | |
54 'pk': f'balance#{company_ticker}', | |
55 'sk': f'{date}#{row_index}', | |
56 'account_name': account['1'], | |
57 'account_value': account_value, | |
58 'column_types': column_types | |
59 } | |
60 ) | |
61 | |
62 # pk -> item_type#company_ticker | |
63 # sk -> date | |
64 | |
65 table.put_item( | |
66 Item={ | |
67 'pk': f'file#{company_ticker}', | |
68 'sk': f"{date}", | |
69 'filename': object_key.replace('processed/', '') | |
70 } | |
71 ) | |
72 | |
73 return { | |
74 "statusCode": 200, | |
75 "body": json.dumps({ | |
76 "message": "ok" | |
77 }), | |
78 } |