This commit is contained in:
dechert 2023-03-30 15:36:02 +02:00
parent add7a566d5
commit 4332a35f4a
20 changed files with 649 additions and 368 deletions

8
.gitignore vendored
View File

@ -1,3 +1,11 @@
# Airflow stuff
logs/
airflow.db
airflow.cfg
unittests.cfg
webserver_config.py
# ---> JupyterNotebooks
# gitignore template for Jupyter Notebooks
# website: http://jupyter.org/

View File

@ -48,8 +48,6 @@ https://aoe2cm.net/draft/FMHZx
Airflow:
Steps:
- input aoecm link
@ -57,38 +55,6 @@ Steps:
- run python script to process data
## Jupyter notebook in docker:
There are different images to choose from: https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html
docker run -it -p 8888:8888 jupyter/scipy-notebook:2023-03-27
Works only on WSL because of mounted host volume
run detached:
docker run --name jupyter -d \
-p 8888:8888 \
--user root \
-e NB_USER="marcelo" \
-e CHOWN_HOME=yes \
-w "/home/marcelo" \
-v ./jupyter-data:/home/marcelo \
jupyter/scipy-notebook:2023-03-27
configuration:
https://jupyter-docker-stacks.readthedocs.io/en/latest/using/common.html
How to install new packages:
https://jupyter-docker-stacks.readthedocs.io/en/latest/using/common.html#conda-environments
run inside container:
pip install liquipediapy
docker cp jupyter:/home/marcelo/AoE-data.ipynb .
Actually, the API-usage on the official site is straight forward.
A request to POST https://api.ageofempires.com/api/v2/AgeII/GetMPFull with content of {profileId: "199325", matchType: "3"} and a Header Content-Type: application/json for example will return JSON-encoded stats for Hera and RM. matchType corresponds to the leaderboard and is one of those: (Unranked=0, 1v1 Deathmatch=1, Team Deathmatch=2, 1v1 Random Map=3, Team Random Map=4).

View File

@ -1,2 +0,0 @@
# aoe2-data

View File

@ -0,0 +1,46 @@
{
"info": {
"_postman_id": "4bef20ab-fcaf-4d6b-b73c-ee856fd6e147",
"name": "Airflow",
"schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json",
"_exporter_id": "7683507"
},
"item": [
{
"name": "get all DAGs",
"request": {
"auth": {
"type": "basic",
"basic": [
{
"key": "password",
"value": "airflow",
"type": "string"
},
{
"key": "username",
"value": "airflow",
"type": "string"
}
]
},
"method": "GET",
"header": [],
"url": {
"raw": "http://localhost:8080/api/v1/dags",
"protocol": "http",
"host": [
"localhost"
],
"port": "8080",
"path": [
"api",
"v1",
"dags"
]
}
},
"response": []
}
]
}

24
airflow/Readme.md Normal file
View File

@ -0,0 +1,24 @@
# Apache Airflow Investigation
login:
airflow:airflow
Questions:
- Predefined Modules like "nifi processors"?
xcom= cross communication between DAG components
ti= task instance
test task on CLI:
airflow tasks test aoe_draft_dag
webserver runing in Gunicorn
Redis needed for xCom
OpenFaas Provider
https://airflow.apache.org/docs/apache-airflow-providers-openfaas/stable/index.html
https://airflow.apache.org/docs/apache-airflow-providers-cncf-kubernetes/stable/index.html

View File

@ -0,0 +1,44 @@
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.operators.bash import BashOperator
from datetime import datetime
import drafts
def extract_draft_data():
marcel_vs_canuck = 'lipeX'
return drafts.download_draft_json(marcel_vs_canuck)
def transform_draft(ti):
draft_json=ti.xcom_pull(task_ids=['extract_draft1'])
if not draft_json:
raise ValueError("No value stored in XComs")
draft_picks=drafts.get_picked_maps(draft_json)
return drafts.neutral_map_as_string(draft_picks)
# catchup: avoid running non-triggered dag-runs since start_date
with DAG("aoe_dag", start_date=datetime(2023, 1, 1), schedule_interval="@daily", catchup=False) as dag:
#E
extract_data = PythonOperator(
task_id="extract_draft1",
python_callable=extract_draft_data,
do_xcom_push=True
)
#T
transform_data = PythonOperator(
task_id="transform_draft1",
python_callable=transform_draft
)
#L
print_output = BashOperator(
task_id="print1",
bash_command="echo 'bla'"
)
extract_data >> transform_data >> print_output

27
airflow/dags/drafts.py Normal file
View File

@ -0,0 +1,27 @@
import requests
import json
import pandas as pd
def download_draft_json(draft_id):
url = "https://aoe2cm.net/api/draft/{}".format(draft_id)
payload={}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
# print(response.text)
draft_json_string = response.text
draft_dict = json.loads(draft_json_string)
return draft_dict
def get_picked_maps(draft_json):
draft_events = draft_json['events']
draft_events_df = pd.DataFrame.from_dict(draft_events)
draft_picks = draft_events_df[draft_events_df['actionType'] == 'pick']
return draft_picks
# the last pick is the neutral map
def neutral_map_as_string(draft_picks):
# print(draft_picks.tail(1)['chosenOptionId'])
return str(draft_picks.tail(1)['chosenOptionId'][8])

298
airflow/docker-compose.yaml Normal file
View File

@ -0,0 +1,298 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL.
#
# WARNING: This configuration is for local development. Do not use it in a production deployment.
#
# This configuration supports basic configuration using environment variables or an .env file
# The following variables are supported:
#
# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow.
# Default: apache/airflow:2.5.2
# AIRFLOW_UID - User ID in Airflow containers
# Default: 50000
# AIRFLOW_PROJ_DIR - Base path to which all the files will be volumed.
# Default: .
# Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode
#
# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested).
# Default: airflow
# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested).
# Default: airflow
# _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers.
# Use this option ONLY for quick checks. Installing requirements at container
# startup is done EVERY TIME the service is started.
# A better way is to build a custom image or extend the official image
# as described in https://airflow.apache.org/docs/docker-stack/build.html.
# Default: ''
#
# Feel free to modify this file to suit your needs.
---
version: '3.8'
x-airflow-common:
&airflow-common
# In order to add custom dependencies or upgrade provider packages you can use your extended image.
# Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml
# and uncomment the "build" line below, Then run `docker-compose build` to build the images.
image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.5.2}
# build: .
environment:
&airflow-common-env
AIRFLOW__CORE__EXECUTOR: CeleryExecutor
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
# For backward compatibility, with Airflow <2.3
AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow
AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
AIRFLOW__CORE__FERNET_KEY: ''
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
AIRFLOW__CORE__LOAD_EXAMPLES: 'true'
AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session'
# yamllint disable rule:line-length
# Use simple http server on scheduler for health checks
# See https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/logging-monitoring/check-health.html#scheduler-health-check-server
# yamllint enable rule:line-length
AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
# WARNING: Use _PIP_ADDITIONAL_REQUIREMENTS option ONLY for a quick checks
# for other purpose (development, test and especially production usage) build/extend Airflow image.
_PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
volumes:
- ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
- ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
- ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
user: "${AIRFLOW_UID:-50000}:0"
depends_on:
&airflow-common-depends-on
redis:
condition: service_healthy
postgres:
condition: service_healthy
services:
postgres:
image: postgres:13
environment:
POSTGRES_USER: airflow
POSTGRES_PASSWORD: airflow
POSTGRES_DB: airflow
volumes:
- postgres-db-volume:/var/lib/postgresql/data
healthcheck:
test: ["CMD", "pg_isready", "-U", "airflow"]
interval: 10s
retries: 5
start_period: 5s
restart: always
redis:
image: redis:latest
expose:
- 6379
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 30s
retries: 50
start_period: 30s
restart: always
airflow-webserver:
<<: *airflow-common
command: webserver
ports:
- "8080:8080"
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
restart: always
depends_on:
<<: *airflow-common-depends-on
airflow-init:
condition: service_completed_successfully
airflow-scheduler:
<<: *airflow-common
command: scheduler
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:8974/health"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
restart: always
depends_on:
<<: *airflow-common-depends-on
airflow-init:
condition: service_completed_successfully
airflow-worker:
<<: *airflow-common
command: celery worker
healthcheck:
test:
- "CMD-SHELL"
- 'celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"'
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
environment:
<<: *airflow-common-env
# Required to handle warm shutdown of the celery workers properly
# See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation
DUMB_INIT_SETSID: "0"
restart: always
depends_on:
<<: *airflow-common-depends-on
airflow-init:
condition: service_completed_successfully
airflow-triggerer:
<<: *airflow-common
command: triggerer
healthcheck:
test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
restart: always
depends_on:
<<: *airflow-common-depends-on
airflow-init:
condition: service_completed_successfully
airflow-init:
<<: *airflow-common
entrypoint: /bin/bash
# yamllint disable rule:line-length
command:
- -c
- |
function ver() {
printf "%04d%04d%04d%04d" $${1//./ }
}
airflow_version=$$(AIRFLOW__LOGGING__LOGGING_LEVEL=INFO && gosu airflow airflow version)
airflow_version_comparable=$$(ver $${airflow_version})
min_airflow_version=2.2.0
min_airflow_version_comparable=$$(ver $${min_airflow_version})
if (( airflow_version_comparable < min_airflow_version_comparable )); then
echo
echo -e "\033[1;31mERROR!!!: Too old Airflow version $${airflow_version}!\e[0m"
echo "The minimum Airflow version supported: $${min_airflow_version}. Only use this or higher!"
echo
exit 1
fi
if [[ -z "${AIRFLOW_UID}" ]]; then
echo
echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m"
echo "If you are on Linux, you SHOULD follow the instructions below to set "
echo "AIRFLOW_UID environment variable, otherwise files will be owned by root."
echo "For other operating systems you can get rid of the warning with manually created .env file:"
echo " See: https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user"
echo
fi
one_meg=1048576
mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg))
cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat)
disk_available=$$(df / | tail -1 | awk '{print $$4}')
warning_resources="false"
if (( mem_available < 4000 )) ; then
echo
echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m"
echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))"
echo
warning_resources="true"
fi
if (( cpus_available < 2 )); then
echo
echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m"
echo "At least 2 CPUs recommended. You have $${cpus_available}"
echo
warning_resources="true"
fi
if (( disk_available < one_meg * 10 )); then
echo
echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m"
echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))"
echo
warning_resources="true"
fi
if [[ $${warning_resources} == "true" ]]; then
echo
echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m"
echo "Please follow the instructions to increase amount of resources available:"
echo " https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#before-you-begin"
echo
fi
mkdir -p /sources/logs /sources/dags /sources/plugins
chown -R "${AIRFLOW_UID}:0" /sources/{logs,dags,plugins}
exec /entrypoint airflow version
# yamllint enable rule:line-length
environment:
<<: *airflow-common-env
_AIRFLOW_DB_UPGRADE: 'true'
_AIRFLOW_WWW_USER_CREATE: 'true'
_AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
_AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}
_PIP_ADDITIONAL_REQUIREMENTS: ''
user: "0:0"
volumes:
- ${AIRFLOW_PROJ_DIR:-.}:/sources
airflow-cli:
<<: *airflow-common
profiles:
- debug
environment:
<<: *airflow-common-env
CONNECTION_CHECK_MAX_COUNT: "0"
# Workaround for entrypoint issue. See: https://github.com/apache/airflow/issues/16252
command:
- bash
- -c
- airflow
# You can enable flower by adding "--profile flower" option e.g. docker-compose --profile flower up
# or by explicitly targeted on the command line e.g. docker-compose up flower.
# See: https://docs.docker.com/compose/profiles/
flower:
<<: *airflow-common
command: celery flower
profiles:
- flower
ports:
- "5555:5555"
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:5555/"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
restart: always
depends_on:
<<: *airflow-common-depends-on
airflow-init:
condition: service_completed_successfully
volumes:
postgres-db-volume:

View File

@ -1,332 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 16,
"id": "4817ec9b-c8e3-45a4-97dc-acee536e4d77",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import requests\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a05a0d5e-012e-46b8-a52c-7f8a3a412e62",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"nac4_finals_draft_id='FMHZx'"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "16438549-78b6-4865-ba7d-fb90486579e3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def download_draft_json(draft_id):\n",
" url = \"https://aoe2cm.net/api/draft/{}\".format(draft_id)\n",
"\n",
" payload={}\n",
" headers = {}\n",
"\n",
" response = requests.request(\"GET\", url, headers=headers, data=payload)\n",
"\n",
" # print(response.text)\n",
" draft_json_string = response.text\n",
" draft_dict = json.loads(draft_json_string)\n",
" return draft_dict"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "527953cb-e30f-4599-a5e6-b5f8ce3b678d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# draft_dict['events']"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "4f068845-8292-4b38-bd2b-0d89a2eea434",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"marcel_vs_canuck= 'lipeX'"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "8a375968-3d08-46e2-b7be-45a926de40f1",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"marcel_vs_canuck_json = download_draft_json(marcel_vs_canuck)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "69ecf8fd-b437-4632-9f6e-1172a34badf0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# marcel_vs_canuck_json"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "c45783ac-50c0-4cf2-b451-23d11b5f5079",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "a9f08fa9-4f90-4ba1-a639-78ec888ac86c",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"draft_events = marcel_vs_canuck_json['events']"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "527935c9-1f49-4c7f-8b4a-b95475a66068",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"draft_events_df = pd.DataFrame.from_dict(draft_events)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "7e2bb0c3-ab79-4e95-94f7-52a590e8dfe6",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"draft_picks = draft_events_df[draft_events_df['actionType'] == 'pick']"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "99dedcda-5664-492b-a076-7fe059948660",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>player</th>\n",
" <th>executingPlayer</th>\n",
" <th>actionType</th>\n",
" <th>chosenOptionId</th>\n",
" <th>isRandomlyChosen</th>\n",
" <th>offset</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>HOST</td>\n",
" <td>HOST</td>\n",
" <td>pick</td>\n",
" <td>Koala</td>\n",
" <td>False</td>\n",
" <td>13973</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>GUEST</td>\n",
" <td>GUEST</td>\n",
" <td>pick</td>\n",
" <td>Nordinseln</td>\n",
" <td>False</td>\n",
" <td>26925</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>HOST</td>\n",
" <td>HOST</td>\n",
" <td>pick</td>\n",
" <td>Vier Seen</td>\n",
" <td>False</td>\n",
" <td>54947</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" player executingPlayer actionType chosenOptionId isRandomlyChosen offset\n",
"2 HOST HOST pick Koala False 13973\n",
"3 GUEST GUEST pick Nordinseln False 26925\n",
"8 HOST HOST pick Vier Seen False 54947"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"draft_picks"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "17a9c895-c486-4b53-b34d-917e6e94d0fc",
"metadata": {},
"outputs": [],
"source": [
"# the last pick is the neutral map\n",
"def mark_neutral_map(draft_picks):\n",
" draft_picks.\n",
" draft_picks['neutral_map'] = "
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "1a82cae5-dd3d-4799-baa1-be30f550d19b",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>player</th>\n",
" <th>executingPlayer</th>\n",
" <th>actionType</th>\n",
" <th>chosenOptionId</th>\n",
" <th>isRandomlyChosen</th>\n",
" <th>offset</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>HOST</td>\n",
" <td>HOST</td>\n",
" <td>pick</td>\n",
" <td>Vier Seen</td>\n",
" <td>False</td>\n",
" <td>54947</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" player executingPlayer actionType chosenOptionId isRandomlyChosen offset\n",
"8 HOST HOST pick Vier Seen False 54947"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"draft_picks.tail(1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

35
jupyter/README.md Normal file
View File

@ -0,0 +1,35 @@
# aoe2-data
## Jupyter notebook in docker:
There are different images to choose from: https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html
docker run -it -p 8888:8888 jupyter/scipy-notebook:2023-03-27
Works only on WSL because of mounted host volume
run detached:
```bash
docker run --name jupyter -d --rm \
-p 8888:8888 \
--user root \
-e NB_USER="marcelo" \
-e CHOWN_HOME=yes \
-w "/home/marcelo" \
-v /mnt/c/Users/dechert/git/aoe2-data/jupyter-data:/home/marcelo/src \
jupyter/scipy-notebook:2023-03-27
```
`docker logs -f jupyter` to see the url with secret
configuration:
https://jupyter-docker-stacks.readthedocs.io/en/latest/using/common.html
How to install new packages:
https://jupyter-docker-stacks.readthedocs.io/en/latest/using/common.html#conda-environments
run inside container:
pip install liquipediapy
docker exec -it jupyter bash
docker cp jupyter:/home/marcelo/AoE-data.ipynb .

View File

@ -0,0 +1,140 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "53445797-52c4-4443-8095-889cf1c24298",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%load_ext autoreload\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4de950c8-cabe-4874-aa78-548342993b05",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%autoreload 1"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a121cdfe-114a-4aae-b107-df891338dfcc",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import drafts"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a05a0d5e-012e-46b8-a52c-7f8a3a412e62",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"nac4_finals_draft_id='FMHZx'"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "4f068845-8292-4b38-bd2b-0d89a2eea434",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"marcel_vs_canuck= 'lipeX'"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "8a375968-3d08-46e2-b7be-45a926de40f1",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"marcel_vs_canuck_json = drafts.download_draft_json(marcel_vs_canuck)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "fa4e0d16-e397-4ff5-8a70-87e6e6cacba9",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"draft_picks=drafts.get_picked_maps(marcel_vs_canuck_json)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a668fe2a-9006-4987-86bc-2179ebdfd0a2",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"'Vier Seen'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"drafts.neutral_map_as_string(draft_picks)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b337190-7887-40b7-9d10-b8039f5f2dfd",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,27 @@
import requests
import json
import pandas as pd
def download_draft_json(draft_id):
url = "https://aoe2cm.net/api/draft/{}".format(draft_id)
payload={}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
# print(response.text)
draft_json_string = response.text
draft_dict = json.loads(draft_json_string)
return draft_dict
def get_picked_maps(draft_json):
draft_events = draft_json['events']
draft_events_df = pd.DataFrame.from_dict(draft_events)
draft_picks = draft_events_df[draft_events_df['actionType'] == 'pick']
return draft_picks
# the last pick is the neutral map
def neutral_map_as_string(draft_picks):
# print(draft_picks.tail(1)['chosenOptionId'])
return str(draft_picks.tail(1)['chosenOptionId'][8])