changeset 10:2350662483a3

fix minor bugs
author Dennis C. M. <dennis@denniscm.com>
date Thu, 08 Jun 2023 17:16:36 +0100
parents bf19235a9636
children d09dee7a86da
files analyze_document/app.py events/analyze_document_event.json upload_document/app.py
diffstat 3 files changed, 8 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/analyze_document/app.py	Wed Jun 07 11:07:14 2023 +0100
+++ b/analyze_document/app.py	Thu Jun 08 17:16:36 2023 +0100
@@ -13,8 +13,8 @@
     bucket_name = event_detail['bucket']['name']
     object_key = event_detail['object']['key']
 
-    company_ticker = re.search('unprocessed/(.*)_', object_key).group(1)
-    doc_type = re.search(f'unprocessed/{company_ticker}_(.*)_', object_key).group(1)
+    company_ticker = re.search('unprocessed/(.*?)_', object_key).group(1)
+    doc_type = re.search(f'unprocessed/{company_ticker}_(.*?)_', object_key).group(1)
     file_id = str(uuid.uuid4())
 
     data_dict = textract_client.analyze_document(
--- a/events/analyze_document_event.json	Wed Jun 07 11:07:14 2023 +0100
+++ b/events/analyze_document_event.json	Thu Jun 08 17:16:36 2023 +0100
@@ -15,7 +15,7 @@
          "name":"sandbox-finance-parser-data"
       },
       "object":{
-         "key":"unprocessed/san_balance.pdf",
+         "key":"unprocessed/san_balance_1.pdf",
          "size":49856,
          "etag":"0adc595c8f2dbfabb5c4095f1f91b458",
          "sequencer":"00647A159E6438B1A6"
--- a/upload_document/app.py	Wed Jun 07 11:07:14 2023 +0100
+++ b/upload_document/app.py	Thu Jun 08 17:16:36 2023 +0100
@@ -41,7 +41,11 @@
             The following statement avoids getting `2020` as the value of `ASSETS`.
             """
 
-            account_value = account[dateColumn]
+            try:
+                account_value = account[dateColumn]
+            except KeyError:
+                account_value = ''
+
             if 'COLUMN_HEADER' in column_types and date == account_value:
                 account_value = ''