prevent timeout fetch daily maximo

main
MrWaradana 9 months ago
parent 690751afff
commit a7dbfede7d

@ -3,16 +3,51 @@ import requests
import json import json
import uuid import uuid
import logging import logging
import time
from airflow import DAG from airflow import DAG
from airflow.operators.python import PythonOperator from airflow.operators.python import PythonOperator, BranchPythonOperator
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
from airflow.operators.dummy import DummyOperator
from airflow.utils.dates import days_ago from airflow.utils.dates import days_ago
from airflow.models import Variable, DagRun
from airflow.utils.session import create_session
from airflow.utils.state import State
# Configure logging # Configure logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def fetch_maximo_data(**context): def is_dag_already_running(**context):
"""
Check if there are other active DAG runs to prevent duplicate execution
"""
dag_id = context["dag"].dag_id
run_id = context["run_id"]
with create_session() as session:
# Count running instances of this DAG excluding the current run
running_dags = (
session.query(DagRun)
.filter(
DagRun.dag_id == dag_id,
DagRun.run_id != run_id,
DagRun.state == State.RUNNING,
)
.count()
)
if running_dags > 0:
logger.info(
f"Found {running_dags} other running instance(s) of this DAG. Skipping execution."
)
return "skip_execution"
else:
logger.info("No other running instances found. Proceeding with execution.")
return "fetch_daily_maximo_data"
def fetch_daily_maximo_data(**context):
""" """
Fetch data from the Maximo endpoint using GET method and process the response Fetch data from the Maximo endpoint using GET method and process the response
""" """
@ -38,7 +73,7 @@ def fetch_maximo_data(**context):
response = requests.get( response = requests.get(
url=fetch_url, url=fetch_url,
headers=headers, headers=headers,
timeout=420, # Increased timeout to 7 minutes (420 seconds) timeout=60, # Increased timeout to 60 seconds
) )
logger.info( logger.info(
@ -90,6 +125,24 @@ def fetch_maximo_data(**context):
} }
def process_response(**context):
"""
Process the response from the Maximo API
"""
ti = context["ti"]
result = ti.xcom_pull(task_ids="fetch_daily_maximo_data", key="fetch_result")
if result:
logger.info(f"Processing result: {result}")
# Add any additional processing logic here
# Sleep for a short time to simulate processing
time.sleep(2)
return True
return False
# Define default arguments for the DAG # Define default arguments for the DAG
default_args = { default_args = {
"owner": "airflow", "owner": "airflow",
@ -108,14 +161,40 @@ dag = DAG(
# Schedule to run daily at 21:00, 22:00, and 23:00 # Schedule to run daily at 21:00, 22:00, and 23:00
schedule_interval="0 21-23 * * *", schedule_interval="0 21-23 * * *",
start_date=days_ago(1), start_date=days_ago(1),
tags=["maximo", "api", "fetch"], tags=["maximo", "api", "fetch", "continuous", "daily"],
catchup=False, catchup=False,
) )
# Branch operator to check if we should proceed or skip
check_running = BranchPythonOperator(
task_id="check_if_already_running",
python_callable=is_dag_already_running,
provide_context=True,
dag=dag,
)
# Skip execution dummy task
skip_execution = DummyOperator(
task_id="skip_execution",
dag=dag,
)
# Define the task to fetch data from Maximo using PythonOperator # Define the task to fetch data from Maximo using PythonOperator
fetch_task = PythonOperator( fetch_task = PythonOperator(
task_id="fetch_maximo_data", task_id="fetch_daily_maximo_data",
python_callable=fetch_maximo_data, python_callable=fetch_daily_maximo_data,
provide_context=True, provide_context=True,
dag=dag, dag=dag,
) )
# Define task to process the response
process_task = PythonOperator(
task_id="process_response",
python_callable=process_response,
provide_context=True,
dag=dag,
)
# Set task dependencies
check_running >> [skip_execution, fetch_task]
fetch_task >> process_task

Loading…
Cancel
Save