Compare commits

...

2 Commits

Author SHA1 Message Date
Marcel D
d1d7d1b58d improve neutral map algo 2023-03-30 20:24:03 +02:00
Marcel D
38e4d086cf fix xcom 2023-03-30 16:54:11 +02:00
5 changed files with 123 additions and 12 deletions

View File

@ -1,5 +1,9 @@
# Apache Airflow Investigation # Apache Airflow Investigation
## How to run
in airflow directory:
`docker-compose up -d`
login: login:
airflow:airflow airflow:airflow
@ -22,3 +26,8 @@ https://airflow.apache.org/docs/apache-airflow-providers-openfaas/stable/index.h
https://airflow.apache.org/docs/apache-airflow-providers-cncf-kubernetes/stable/index.html https://airflow.apache.org/docs/apache-airflow-providers-cncf-kubernetes/stable/index.html
Lessons learned
Airflow stack takes A LOT of RAM: ~ 9GB

View File

@ -12,7 +12,7 @@ def extract_draft_data():
return drafts.download_draft_json(marcel_vs_canuck) return drafts.download_draft_json(marcel_vs_canuck)
def transform_draft(ti): def transform_draft(ti):
draft_json=ti.xcom_pull(task_ids=['extract_draft1']) draft_json=ti.xcom_pull(task_ids='extract_draft1', key='return_value')
if not draft_json: if not draft_json:
raise ValueError("No value stored in XComs") raise ValueError("No value stored in XComs")
draft_picks=drafts.get_picked_maps(draft_json) draft_picks=drafts.get_picked_maps(draft_json)
@ -32,13 +32,16 @@ with DAG("aoe_dag", start_date=datetime(2023, 1, 1), schedule_interval="@daily",
#T #T
transform_data = PythonOperator( transform_data = PythonOperator(
task_id="transform_draft1", task_id="transform_draft1",
python_callable=transform_draft python_callable=transform_draft,
do_xcom_push=True
) )
#L #L
string_to_print = "{{ task_instance.xcom_pull(task_ids='transform_draft1', key='return_value') }}"
print_output = BashOperator( print_output = BashOperator(
task_id="print1", task_id="print1",
bash_command="echo 'bla'" bash_command="echo ${STRING_TO_PRINT}",
env={"STRING_TO_PRINT": string_to_print}
) )
extract_data >> transform_data >> print_output extract_data >> transform_data >> print_output

View File

@ -14,10 +14,23 @@ docker run --name jupyter -d --rm \
-e NB_USER="marcelo" \ -e NB_USER="marcelo" \
-e CHOWN_HOME=yes \ -e CHOWN_HOME=yes \
-w "/home/marcelo" \ -w "/home/marcelo" \
-v /mnt/c/Users/dechert/git/aoe2-data/jupyter/src:/home/marcelo/src \ -v /mnt/c/Users/Marcelo/git/aoe2-data/jupyter/src:/home/marcelo/src \
jupyter/scipy-notebook:2023-03-27 jupyter/scipy-notebook:2023-03-27
``` ```
Username agnostic command (does not work):
```bash
PATH_TO_JUPYTER_FOLDER="/mnt/c/Users/$(echo whoami)/git/aoe2-data/jupyter/src" docker run --name jupyter -d --rm \
-p 8888:8888 \
--user root \
-e NB_USER="marcelo" \
-e CHOWN_HOME=yes \
-w "/home/marcelo" \
-v ${PATH_TO_JUPYTER_FOLDER}:/home/marcelo/src \
jupyter/scipy-notebook:2023-03-27
```
`docker logs -f jupyter` to see the url with secret `docker logs -f jupyter` to see the url with secret
configuration: configuration:

View File

@ -9,7 +9,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"%load_ext autoreload\n" "%load_ext autoreload"
] ]
}, },
{ {
@ -26,7 +26,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 4,
"id": "a121cdfe-114a-4aae-b107-df891338dfcc", "id": "a121cdfe-114a-4aae-b107-df891338dfcc",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -109,11 +109,97 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 10,
"id": "7b337190-7887-40b7-9d10-b8039f5f2dfd", "id": "7b337190-7887-40b7-9d10-b8039f5f2dfd",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>player</th>\n",
" <th>executingPlayer</th>\n",
" <th>actionType</th>\n",
" <th>chosenOptionId</th>\n",
" <th>isRandomlyChosen</th>\n",
" <th>offset</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>HOST</td>\n",
" <td>HOST</td>\n",
" <td>pick</td>\n",
" <td>Koala</td>\n",
" <td>False</td>\n",
" <td>13973</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>GUEST</td>\n",
" <td>GUEST</td>\n",
" <td>pick</td>\n",
" <td>Nordinseln</td>\n",
" <td>False</td>\n",
" <td>26925</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8</td>\n",
" <td>HOST</td>\n",
" <td>HOST</td>\n",
" <td>pick</td>\n",
" <td>Vier Seen</td>\n",
" <td>False</td>\n",
" <td>54947</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index player executingPlayer actionType chosenOptionId isRandomlyChosen \\\n",
"0 2 HOST HOST pick Koala False \n",
"1 3 GUEST GUEST pick Nordinseln False \n",
"2 8 HOST HOST pick Vier Seen False \n",
"\n",
" offset \n",
"0 13973 \n",
"1 26925 \n",
"2 54947 "
]
},
"execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [], "output_type": "execute_result"
"source": [] }
],
"source": [
"draft_picks.reset_index()"
]
} }
], ],
"metadata": { "metadata": {

View File

@ -23,5 +23,5 @@ def get_picked_maps(draft_json):
# the last pick is the neutral map # the last pick is the neutral map
def neutral_map_as_string(draft_picks): def neutral_map_as_string(draft_picks):
# print(draft_picks.tail(1)['chosenOptionId']) neutral_map = draft_picks.tail(1)['chosenOptionId'].values[0]
return str(draft_picks.tail(1)['chosenOptionId'][8]) return neutral_map