Mercurial > public > finance-parser
changeset 10:2350662483a3
fix minor bugs
author | Dennis C. M. <dennis@denniscm.com> |
---|---|
date | Thu, 08 Jun 2023 17:16:36 +0100 |
parents | bf19235a9636 |
children | d09dee7a86da |
files | analyze_document/app.py events/analyze_document_event.json upload_document/app.py |
diffstat | 3 files changed, 8 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/analyze_document/app.py Wed Jun 07 11:07:14 2023 +0100 +++ b/analyze_document/app.py Thu Jun 08 17:16:36 2023 +0100 @@ -13,8 +13,8 @@ bucket_name = event_detail['bucket']['name'] object_key = event_detail['object']['key'] - company_ticker = re.search('unprocessed/(.*)_', object_key).group(1) - doc_type = re.search(f'unprocessed/{company_ticker}_(.*)_', object_key).group(1) + company_ticker = re.search('unprocessed/(.*?)_', object_key).group(1) + doc_type = re.search(f'unprocessed/{company_ticker}_(.*?)_', object_key).group(1) file_id = str(uuid.uuid4()) data_dict = textract_client.analyze_document(
--- a/events/analyze_document_event.json Wed Jun 07 11:07:14 2023 +0100 +++ b/events/analyze_document_event.json Thu Jun 08 17:16:36 2023 +0100 @@ -15,7 +15,7 @@ "name":"sandbox-finance-parser-data" }, "object":{ - "key":"unprocessed/san_balance.pdf", + "key":"unprocessed/san_balance_1.pdf", "size":49856, "etag":"0adc595c8f2dbfabb5c4095f1f91b458", "sequencer":"00647A159E6438B1A6"
--- a/upload_document/app.py Wed Jun 07 11:07:14 2023 +0100 +++ b/upload_document/app.py Thu Jun 08 17:16:36 2023 +0100 @@ -41,7 +41,11 @@ The following statement avoids getting `2020` as the value of `ASSETS`. """ - account_value = account[dateColumn] + try: + account_value = account[dateColumn] + except KeyError: + account_value = '' + if 'COLUMN_HEADER' in column_types and date == account_value: account_value = ''