DAG: hardness_clustering_bhp_spence

schedule: 50 16,0 * * *


hardness_clustering_bhp_spence

Toggle wrap
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# Airflow dependencies
from asyncio import Task
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
from airflow.models import Variable

from jinja2 import Template
from functools import reduce
from datetime import datetime, timedelta
from standardized_process.dags.functions.hardness_clustering_process import *

import json 
import os 

task_loaded = False

# Default arguments
default_args = {
    'owner': 'pedro',
    'start_date': datetime(2023, 4, 22, 0, 0, 0)
}

year = datetime.now().year
month = datetime.now().month
day = datetime.now().day

mine_site = "BHP Spence"
drilling_source = "Surface Manager"

# Aplica templating Jinja al archivo JSON
with open('dags/standardized_process/dags/configuration/hardness_clustering_dag.json') as f:
    template_content = f.read()

# Crear una plantilla Jinja2
template = Template(template_content)
rendered_config = template.render(mine_site=mine_site, drilling_source=drilling_source, year=year, month=month, day=day)
data = json.loads(rendered_config)

# Diccionario con un mapa de todas las funciones
function_mapping = {
    'generate_feature_clustering_model': generate_feature_clustering_model,
    'generate_hardness_label':generate_hardness_label,
    'add_drilling_times': add_drilling_times,
    'update_input_data': update_input_data
}

def chain_tasks(x, y):
    return x << y

dag = DAG("hardness_clustering_bhp_spence",  schedule_interval='50 16,0 * * *',  tags=['Standardized Process'], default_args=default_args, max_active_runs=1, is_paused_upon_creation=False)

list_task = []
for task in data['task_scheme']:
    if task['active'] == True: 
        drill_report_task = PythonOperator(
            task_id = task['task_id'],
            python_callable = function_mapping[task['function']],
            op_kwargs = task['op_kwargs'],
            retries=3,
            dag = dag
        )
        list_task.append(drill_report_task)

# Establecer dependencias
for i in range(len(list_task) - 1):
    list_task[i] >> list_task[i + 1]