Обновить dags/split_subkonto2.py

This commit is contained in:
bn_user 2025-09-08 08:16:30 +00:00
parent 68f244a5fb
commit 2090c7e025
1 changed files with 35 additions and 19 deletions

View File

@ -209,10 +209,16 @@ def extract_title(s, number_span=None, date_span=None):
return None
def parse_contract_cell(cell_text):
s = cell_text.strip()
if not s:
return {"title": None, "number": None, "date_raw": None, "date_norm": None}
if pd.isna(cell_text) or not str(cell_text).strip():
return pd.Series({
"subkonto2": None,
"naimenovanie": None,
"nomer": None,
"date_bedin": None,
"date_end": None
})
s = str(cell_text).strip()
s_norm_spaces = normalize_spaces(s)
# Поиск даты
@ -228,12 +234,13 @@ def parse_contract_cell(cell_text):
if title and '' in title:
title = re.sub(r'\s*', '', title).strip()
return {
"title": title,
"number": number,
"date_raw": date_raw,
"date_norm": date_norm
}
return pd.Series({
"subkonto2": cell_text,
"naimenovanie": title,
"nomer": number,
"date_bedin": date_norm,
"date_end": None
})
def read_dict_subkonto2_db(**kwargs):
df = pd.read_sql("""
@ -246,7 +253,8 @@ def read_dict_subkonto2_db(**kwargs):
from public.oborotno_salbdovaya_vedomostb osv
left join public.dict_subkonto_two as dst
on dst.subkonto2 = osv.subkonto2
where osv.subkonto2 not in (select dst2.subkonto2 from dict_subkonto_two dst2)""")
where osv.subkonto2 not in (select dst2.subkonto2 from dict_subkonto_two dst2)
""")
return df.to_dict(orient='records')
def split_subkonto_from_1C(**kwargs):
@ -254,19 +262,27 @@ def split_subkonto_from_1C(**kwargs):
select distinct
osv.subkonto2 as subkonto2
from public.oborotno_salbdovaya_vedomostb osv
where osv.subkonto2 not in (select dst2.subkonto2 from dict_subkonto_two dst2)
""")
return df.to_dict(orient='records')
result_df = df['column_name'].apply(parse_contract_cell)
return result_df.to_dict(orient='records')
def merge_dict_and_split_1C(**kwargs):
ti = kwargs['ti']
dict_subkonto = ti.xcom_pull(task_ids='read_dict_subkonto2_db')
split_subkonto = ti.xcom_pull(task_ids='split_subkonto_from_1C')
df_dict_subkonto = pd.DataFrame.from_records(dict_subkonto) if dict_subkonto else pd.DataFrame()
df_split_subkonto = pd.DataFrame.from_records(split_subkonto) if split_subkonto else pd.DataFrame()
df_subkonto = df_split_subkonto.merge(df_dict_subkonto, how='left', on='subkonto2')
return df_subkonto.to_dict(orient='records')
with DAG(
dag_id='split_subkonto2',
default_args=default_args,
description='Разделение Субконто2 на наименование, номер, дату начала и дату окончания договора',
description='Разделение Субконто2 на наименование, номер, дату начала и дату окончания договора. C последующим мэппингом справочника.',
schedule_interval=None,
catchup=False,
tags=['sigma'],