Обновить dags/split_subkonto2.py
This commit is contained in:
parent
68f244a5fb
commit
2090c7e025
|
|
@ -209,10 +209,16 @@ def extract_title(s, number_span=None, date_span=None):
|
|||
return None
|
||||
|
||||
def parse_contract_cell(cell_text):
|
||||
s = cell_text.strip()
|
||||
if not s:
|
||||
return {"title": None, "number": None, "date_raw": None, "date_norm": None}
|
||||
if pd.isna(cell_text) or not str(cell_text).strip():
|
||||
return pd.Series({
|
||||
"subkonto2": None,
|
||||
"naimenovanie": None,
|
||||
"nomer": None,
|
||||
"date_bedin": None,
|
||||
"date_end": None
|
||||
})
|
||||
|
||||
s = str(cell_text).strip()
|
||||
s_norm_spaces = normalize_spaces(s)
|
||||
|
||||
# Поиск даты
|
||||
|
|
@ -228,12 +234,13 @@ def parse_contract_cell(cell_text):
|
|||
if title and '№' in title:
|
||||
title = re.sub(r'№\s*', '', title).strip()
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"number": number,
|
||||
"date_raw": date_raw,
|
||||
"date_norm": date_norm
|
||||
}
|
||||
return pd.Series({
|
||||
"subkonto2": cell_text,
|
||||
"naimenovanie": title,
|
||||
"nomer": number,
|
||||
"date_bedin": date_norm,
|
||||
"date_end": None
|
||||
})
|
||||
|
||||
def read_dict_subkonto2_db(**kwargs):
|
||||
df = pd.read_sql("""
|
||||
|
|
@ -246,7 +253,8 @@ def read_dict_subkonto2_db(**kwargs):
|
|||
from public.oborotno_salbdovaya_vedomostb osv
|
||||
left join public.dict_subkonto_two as dst
|
||||
on dst.subkonto2 = osv.subkonto2
|
||||
where osv.subkonto2 not in (select dst2.subkonto2 from dict_subkonto_two dst2)""")
|
||||
where osv.subkonto2 not in (select dst2.subkonto2 from dict_subkonto_two dst2)
|
||||
""")
|
||||
return df.to_dict(orient='records')
|
||||
|
||||
def split_subkonto_from_1C(**kwargs):
|
||||
|
|
@ -254,19 +262,27 @@ def split_subkonto_from_1C(**kwargs):
|
|||
select distinct
|
||||
osv.subkonto2 as subkonto2
|
||||
from public.oborotno_salbdovaya_vedomostb osv
|
||||
where osv.subkonto2 not in (select dst2.subkonto2 from dict_subkonto_two dst2)
|
||||
""")
|
||||
|
||||
return df.to_dict(orient='records')
|
||||
result_df = df['column_name'].apply(parse_contract_cell)
|
||||
return result_df.to_dict(orient='records')
|
||||
|
||||
def merge_dict_and_split_1C(**kwargs):
|
||||
|
||||
ti = kwargs['ti']
|
||||
dict_subkonto = ti.xcom_pull(task_ids='read_dict_subkonto2_db')
|
||||
split_subkonto = ti.xcom_pull(task_ids='split_subkonto_from_1C')
|
||||
|
||||
df_dict_subkonto = pd.DataFrame.from_records(dict_subkonto) if dict_subkonto else pd.DataFrame()
|
||||
df_split_subkonto = pd.DataFrame.from_records(split_subkonto) if split_subkonto else pd.DataFrame()
|
||||
|
||||
df_subkonto = df_split_subkonto.merge(df_dict_subkonto, how='left', on='subkonto2')
|
||||
return df_subkonto.to_dict(orient='records')
|
||||
|
||||
with DAG(
|
||||
dag_id='split_subkonto2',
|
||||
default_args=default_args,
|
||||
description='Разделение Субконто2 на наименование, номер, дату начала и дату окончания договора',
|
||||
description='Разделение Субконто2 на наименование, номер, дату начала и дату окончания договора. C последующим мэппингом справочника.',
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=['sigma'],
|
||||
|
|
|
|||
Loading…
Reference in New Issue