Files
aoe2-data/jupyter/src/folder-compare.ipynb
dechert 6de13bec2b update
2026-02-26 13:59:53 +01:00

894 lines
29 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "53445797-52c4-4443-8095-889cf1c24298",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%load_ext autoreload\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4de950c8-cabe-4874-aa78-548342993b05",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%autoreload 1"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "bccbd4ee-e6e7-4bc5-b72c-98196e811766",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "7b337190-7887-40b7-9d10-b8039f5f2dfd",
"metadata": {},
"outputs": [],
"source": [
"project_df = pd.read_csv('encore-files/project.csv')\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "44729805-deb3-4c61-b019-c9f52c5898f0",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['id', 'address', 'building_type', 'city', 'country', 'customer_name',\n",
" 'description', 'name', 'no_of_habitants', 'postal_code', 'projectuuid',\n",
" 'state', 'year_of_construction', 'bms_information_id',\n",
" 'project_coordinator_id', 'project_tag_id', 'ifc_before_tagid',\n",
" 'ifc_to_be_tagid', 'ifc_enhanced_tagid'],\n",
" dtype='object')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"project_df.columns"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "561aa71c-c506-47b7-8b5f-6c00104a8e91",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"only_ifc_ids=project_df[['project_tag_id', 'c',\n",
" 'ifc_to_be_tagid', 'ifc_enhanced_tagid']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0af0d724-4dc5-434d-8209-e6604330c4e9",
"metadata": {},
"outputs": [],
"source": [
"only_ifc_ids"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "d1fe283a-35cd-4bf6-80ef-bd1a68ac2961",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def merge_cols(df):\n",
" df[\"all_ids\"] = df[\"project_tag_id\"] + df[\"project_tag_id\"] + df[\"ifc_to_be_tagid\"] + df[\"ifc_enhanced_tagid\"]\n",
" return df[[\"all_ids\"]]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "bfdca4ba-edf0-4394-b20d-8ab04130a30f",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_98/3701186255.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df[\"all_ids\"] = df[\"project_tag_id\"] + df[\"project_tag_id\"] + df[\"ifc_to_be_tagid\"] + df[\"ifc_enhanced_tagid\"]\n"
]
}
],
"source": [
"ids_from_database = merge_cols(only_ifc_ids)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "b8aee40a-7cc9-41b0-a1ef-4c3ed293f229",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>all_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>97a3e117-a4b9-463f-81ed-8a0fd6826b1297a3e117-a...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>b27ac00c-0b6f-4909-81db-ef8c136c1c28b27ac00c-0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>d48ed765-b997-4998-b48f-d5c6f37764a2d48ed765-b...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>3fe6a2a0-dcf6-4108-b079-085e7817b9333fe6a2a0-d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>4036063b-84bf-4312-883d-306648307e814036063b-8...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>6e36b3b6-1b35-46a0-89b4-c64b2dcfc4d26e36b3b6-1...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>6c886d15-2ddf-489e-8440-066ad5c0c0516c886d15-2...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>6ba02956-f25b-4a68-8c02-0d6308b4460f6ba02956-f...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>cc1e623d-ba33-4f84-8bfb-7d19d062cc20cc1e623d-b...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>fe04ff9b-c1ff-41c4-9377-b7df1cfe2d22fe04ff9b-c...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>aa0efa31-7f46-4511-bf25-1a14f724d43baa0efa31-7...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>5d768096-e9e0-4b2d-bd3b-5e48349fd4ca5d768096-e...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>bdd29bf3-c04a-4dd2-85b8-28195d167dffbdd29bf3-c...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>67001609-3c2d-4963-9838-b211b0f5010667001609-3...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>9747c628-6b97-4b21-a9b2-7a249afedafb9747c628-6...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>02cafcb1-98aa-4a7c-bf1e-ddf55a9d5fbe02cafcb1-9...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>ebff9dd8-e032-4bed-ab15-67279baf9604ebff9dd8-e...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>2aa3427a-5388-4bf7-8076-681f1b8ad5d42aa3427a-5...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>246a4471-9477-4adf-9ec5-42ad325733a6246a4471-9...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>337f2a2e-2826-4bc8-83c2-8c0459781c1d337f2a2e-2...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>7ac6f1a5-9ecb-4ab2-9fc1-7abad928544e7ac6f1a5-9...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>a258185a-f60c-452e-82e3-572a544f22cca258185a-f...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>e7bde166-4910-407b-9b1d-1943fbcd6da4e7bde166-4...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>5660466b-2600-4b9e-bbce-faf192cfc9065660466b-2...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>c4cd356d-9d70-4952-8f18-39b3807c81dfc4cd356d-9...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>1fc260f6-4713-4b06-a680-38614db8d9561fc260f6-4...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>432c91b2-ff25-469f-a623-3f39d5ed67d2432c91b2-f...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>7397b503-33ab-47ef-acbe-09edb2176ff37397b503-3...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>8269294e-a9b0-483c-8abe-25dbda0656cf8269294e-a...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>f2b10aa9-536d-4968-ba7b-eaa98e717f43f2b10aa9-5...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>ed5db4a1-5560-4753-9c6b-4e4dc3168ddfed5db4a1-5...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>aa252dd8-4d8e-45ec-ab5e-a531085c2459aa252dd8-4...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>6aff4eb5-c6f3-4a3a-8052-0e9d2525969d6aff4eb5-c...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>e4909d99-cf20-4334-835b-598c66ee2ce1e4909d99-c...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" all_ids\n",
"0 97a3e117-a4b9-463f-81ed-8a0fd6826b1297a3e117-a...\n",
"1 NaN\n",
"2 NaN\n",
"3 NaN\n",
"4 b27ac00c-0b6f-4909-81db-ef8c136c1c28b27ac00c-0...\n",
"5 NaN\n",
"6 d48ed765-b997-4998-b48f-d5c6f37764a2d48ed765-b...\n",
"7 NaN\n",
"8 3fe6a2a0-dcf6-4108-b079-085e7817b9333fe6a2a0-d...\n",
"9 4036063b-84bf-4312-883d-306648307e814036063b-8...\n",
"10 6e36b3b6-1b35-46a0-89b4-c64b2dcfc4d26e36b3b6-1...\n",
"11 NaN\n",
"12 6c886d15-2ddf-489e-8440-066ad5c0c0516c886d15-2...\n",
"13 6ba02956-f25b-4a68-8c02-0d6308b4460f6ba02956-f...\n",
"14 cc1e623d-ba33-4f84-8bfb-7d19d062cc20cc1e623d-b...\n",
"15 fe04ff9b-c1ff-41c4-9377-b7df1cfe2d22fe04ff9b-c...\n",
"16 aa0efa31-7f46-4511-bf25-1a14f724d43baa0efa31-7...\n",
"17 NaN\n",
"18 NaN\n",
"19 5d768096-e9e0-4b2d-bd3b-5e48349fd4ca5d768096-e...\n",
"20 bdd29bf3-c04a-4dd2-85b8-28195d167dffbdd29bf3-c...\n",
"21 67001609-3c2d-4963-9838-b211b0f5010667001609-3...\n",
"22 9747c628-6b97-4b21-a9b2-7a249afedafb9747c628-6...\n",
"23 02cafcb1-98aa-4a7c-bf1e-ddf55a9d5fbe02cafcb1-9...\n",
"24 ebff9dd8-e032-4bed-ab15-67279baf9604ebff9dd8-e...\n",
"25 2aa3427a-5388-4bf7-8076-681f1b8ad5d42aa3427a-5...\n",
"26 246a4471-9477-4adf-9ec5-42ad325733a6246a4471-9...\n",
"27 337f2a2e-2826-4bc8-83c2-8c0459781c1d337f2a2e-2...\n",
"28 7ac6f1a5-9ecb-4ab2-9fc1-7abad928544e7ac6f1a5-9...\n",
"29 a258185a-f60c-452e-82e3-572a544f22cca258185a-f...\n",
"30 e7bde166-4910-407b-9b1d-1943fbcd6da4e7bde166-4...\n",
"31 5660466b-2600-4b9e-bbce-faf192cfc9065660466b-2...\n",
"32 c4cd356d-9d70-4952-8f18-39b3807c81dfc4cd356d-9...\n",
"33 1fc260f6-4713-4b06-a680-38614db8d9561fc260f6-4...\n",
"34 432c91b2-ff25-469f-a623-3f39d5ed67d2432c91b2-f...\n",
"35 7397b503-33ab-47ef-acbe-09edb2176ff37397b503-3...\n",
"36 8269294e-a9b0-483c-8abe-25dbda0656cf8269294e-a...\n",
"37 f2b10aa9-536d-4968-ba7b-eaa98e717f43f2b10aa9-5...\n",
"38 ed5db4a1-5560-4753-9c6b-4e4dc3168ddfed5db4a1-5...\n",
"39 aa252dd8-4d8e-45ec-ab5e-a531085c2459aa252dd8-4...\n",
"40 6aff4eb5-c6f3-4a3a-8052-0e9d2525969d6aff4eb5-c...\n",
"41 e4909d99-cf20-4334-835b-598c66ee2ce1e4909d99-c..."
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ids_from_database"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "f52b6cc0-e0ea-479b-a24e-d4bcfa769bd2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"converted_files_df = pd.read_csv('encore-files/filenames-only.csv')\n"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "da181f14-a495-4950-b4f4-cba92b099184",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>foldername</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>238590</th>\n",
" <td>fffb5e51-74ec-4829-8074-fc095346c8a6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238591</th>\n",
" <td>fffc44dd-2c8d-4d71-b08e-e192e7ad9b37</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238592</th>\n",
" <td>fffc79c6-5f47-4641-87c6-57cc28478f73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238593</th>\n",
" <td>fffcc424-c740-46da-b57c-cc97be249524</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238594</th>\n",
" <td>fffcda08-0da5-4c2a-ab94-fd49477d18dc</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238595</th>\n",
" <td>fffd27c4-ca0b-4067-96d8-848f50a43224</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238596</th>\n",
" <td>fffdab41-f136-4eaa-b870-68aff14e0763</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238597</th>\n",
" <td>fffdedf2-3063-4da8-b7da-cfd7eb76cf90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238598</th>\n",
" <td>fffdf3d8-7069-4c19-90b5-d96d7459b54d</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238599</th>\n",
" <td>fffe0eff-dd7e-4dbb-9f71-29401c553e25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238600</th>\n",
" <td>ffff29a7-ae72-4620-9abf-e3919bd23fe5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238601</th>\n",
" <td>ffff502d-7393-4425-898d-21fefd5bc0e3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238602</th>\n",
" <td>ffff5b42-575a-48c0-8093-61db051f903b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238603</th>\n",
" <td>ffff83bb-bbc8-43c1-a219-e35a5b2e28bd</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238604</th>\n",
" <td>ffffc0e5-0a22-41b1-9d50-c532e2c313aa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238605</th>\n",
" <td>filenames-only.txt</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238606</th>\n",
" <td>filenames.txt</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238607</th>\n",
" <td>files.txt</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238608</th>\n",
" <td>files2.txt</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238609</th>\n",
" <td>index.json</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" foldername\n",
"238590 fffb5e51-74ec-4829-8074-fc095346c8a6\n",
"238591 fffc44dd-2c8d-4d71-b08e-e192e7ad9b37\n",
"238592 fffc79c6-5f47-4641-87c6-57cc28478f73\n",
"238593 fffcc424-c740-46da-b57c-cc97be249524\n",
"238594 fffcda08-0da5-4c2a-ab94-fd49477d18dc\n",
"238595 fffd27c4-ca0b-4067-96d8-848f50a43224\n",
"238596 fffdab41-f136-4eaa-b870-68aff14e0763\n",
"238597 fffdedf2-3063-4da8-b7da-cfd7eb76cf90\n",
"238598 fffdf3d8-7069-4c19-90b5-d96d7459b54d\n",
"238599 fffe0eff-dd7e-4dbb-9f71-29401c553e25\n",
"238600 ffff29a7-ae72-4620-9abf-e3919bd23fe5\n",
"238601 ffff502d-7393-4425-898d-21fefd5bc0e3\n",
"238602 ffff5b42-575a-48c0-8093-61db051f903b\n",
"238603 ffff83bb-bbc8-43c1-a219-e35a5b2e28bd\n",
"238604 ffffc0e5-0a22-41b1-9d50-c532e2c313aa\n",
"238605 filenames-only.txt\n",
"238606 filenames.txt\n",
"238607 files.txt\n",
"238608 files2.txt\n",
"238609 index.json"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"converted_files_df.tail(20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fd2c24b7-ddb1-4d77-9a38-7e11599e91c5",
"metadata": {},
"outputs": [],
"source": [
"converted_files_df['foldername']"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "2fe47979-1a5b-41b4-9b11-5cb4616e6fd8",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def get_diff_for_two_dataframes(df1, df2):\n",
" \"\"\"Returns set of values unique to df1\n",
" \"\"\"\n",
" diff = df1[df1['all_ids'].isin(df2['foldername']) == False]\n",
" return diff"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "5f5a9b4e-91e0-4cbf-8f31-e5cda02d88dc",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>all_ids</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>97a3e117-a4b9-463f-81ed-8a0fd6826b1297a3e117-a...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>b27ac00c-0b6f-4909-81db-ef8c136c1c28b27ac00c-0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>d48ed765-b997-4998-b48f-d5c6f37764a2d48ed765-b...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>3fe6a2a0-dcf6-4108-b079-085e7817b9333fe6a2a0-d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>4036063b-84bf-4312-883d-306648307e814036063b-8...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>6e36b3b6-1b35-46a0-89b4-c64b2dcfc4d26e36b3b6-1...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>6c886d15-2ddf-489e-8440-066ad5c0c0516c886d15-2...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>6ba02956-f25b-4a68-8c02-0d6308b4460f6ba02956-f...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>cc1e623d-ba33-4f84-8bfb-7d19d062cc20cc1e623d-b...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>fe04ff9b-c1ff-41c4-9377-b7df1cfe2d22fe04ff9b-c...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>aa0efa31-7f46-4511-bf25-1a14f724d43baa0efa31-7...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>5d768096-e9e0-4b2d-bd3b-5e48349fd4ca5d768096-e...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>bdd29bf3-c04a-4dd2-85b8-28195d167dffbdd29bf3-c...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>67001609-3c2d-4963-9838-b211b0f5010667001609-3...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>9747c628-6b97-4b21-a9b2-7a249afedafb9747c628-6...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>02cafcb1-98aa-4a7c-bf1e-ddf55a9d5fbe02cafcb1-9...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>ebff9dd8-e032-4bed-ab15-67279baf9604ebff9dd8-e...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>2aa3427a-5388-4bf7-8076-681f1b8ad5d42aa3427a-5...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>246a4471-9477-4adf-9ec5-42ad325733a6246a4471-9...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>337f2a2e-2826-4bc8-83c2-8c0459781c1d337f2a2e-2...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>7ac6f1a5-9ecb-4ab2-9fc1-7abad928544e7ac6f1a5-9...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>a258185a-f60c-452e-82e3-572a544f22cca258185a-f...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>e7bde166-4910-407b-9b1d-1943fbcd6da4e7bde166-4...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>5660466b-2600-4b9e-bbce-faf192cfc9065660466b-2...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>c4cd356d-9d70-4952-8f18-39b3807c81dfc4cd356d-9...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>1fc260f6-4713-4b06-a680-38614db8d9561fc260f6-4...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>432c91b2-ff25-469f-a623-3f39d5ed67d2432c91b2-f...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>7397b503-33ab-47ef-acbe-09edb2176ff37397b503-3...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>8269294e-a9b0-483c-8abe-25dbda0656cf8269294e-a...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>f2b10aa9-536d-4968-ba7b-eaa98e717f43f2b10aa9-5...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>ed5db4a1-5560-4753-9c6b-4e4dc3168ddfed5db4a1-5...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>aa252dd8-4d8e-45ec-ab5e-a531085c2459aa252dd8-4...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>6aff4eb5-c6f3-4a3a-8052-0e9d2525969d6aff4eb5-c...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>e4909d99-cf20-4334-835b-598c66ee2ce1e4909d99-c...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" all_ids\n",
"0 97a3e117-a4b9-463f-81ed-8a0fd6826b1297a3e117-a...\n",
"1 NaN\n",
"2 NaN\n",
"3 NaN\n",
"4 b27ac00c-0b6f-4909-81db-ef8c136c1c28b27ac00c-0...\n",
"5 NaN\n",
"6 d48ed765-b997-4998-b48f-d5c6f37764a2d48ed765-b...\n",
"7 NaN\n",
"8 3fe6a2a0-dcf6-4108-b079-085e7817b9333fe6a2a0-d...\n",
"9 4036063b-84bf-4312-883d-306648307e814036063b-8...\n",
"10 6e36b3b6-1b35-46a0-89b4-c64b2dcfc4d26e36b3b6-1...\n",
"11 NaN\n",
"12 6c886d15-2ddf-489e-8440-066ad5c0c0516c886d15-2...\n",
"13 6ba02956-f25b-4a68-8c02-0d6308b4460f6ba02956-f...\n",
"14 cc1e623d-ba33-4f84-8bfb-7d19d062cc20cc1e623d-b...\n",
"15 fe04ff9b-c1ff-41c4-9377-b7df1cfe2d22fe04ff9b-c...\n",
"16 aa0efa31-7f46-4511-bf25-1a14f724d43baa0efa31-7...\n",
"17 NaN\n",
"18 NaN\n",
"19 5d768096-e9e0-4b2d-bd3b-5e48349fd4ca5d768096-e...\n",
"20 bdd29bf3-c04a-4dd2-85b8-28195d167dffbdd29bf3-c...\n",
"21 67001609-3c2d-4963-9838-b211b0f5010667001609-3...\n",
"22 9747c628-6b97-4b21-a9b2-7a249afedafb9747c628-6...\n",
"23 02cafcb1-98aa-4a7c-bf1e-ddf55a9d5fbe02cafcb1-9...\n",
"24 ebff9dd8-e032-4bed-ab15-67279baf9604ebff9dd8-e...\n",
"25 2aa3427a-5388-4bf7-8076-681f1b8ad5d42aa3427a-5...\n",
"26 246a4471-9477-4adf-9ec5-42ad325733a6246a4471-9...\n",
"27 337f2a2e-2826-4bc8-83c2-8c0459781c1d337f2a2e-2...\n",
"28 7ac6f1a5-9ecb-4ab2-9fc1-7abad928544e7ac6f1a5-9...\n",
"29 a258185a-f60c-452e-82e3-572a544f22cca258185a-f...\n",
"30 e7bde166-4910-407b-9b1d-1943fbcd6da4e7bde166-4...\n",
"31 5660466b-2600-4b9e-bbce-faf192cfc9065660466b-2...\n",
"32 c4cd356d-9d70-4952-8f18-39b3807c81dfc4cd356d-9...\n",
"33 1fc260f6-4713-4b06-a680-38614db8d9561fc260f6-4...\n",
"34 432c91b2-ff25-469f-a623-3f39d5ed67d2432c91b2-f...\n",
"35 7397b503-33ab-47ef-acbe-09edb2176ff37397b503-3...\n",
"36 8269294e-a9b0-483c-8abe-25dbda0656cf8269294e-a...\n",
"37 f2b10aa9-536d-4968-ba7b-eaa98e717f43f2b10aa9-5...\n",
"38 ed5db4a1-5560-4753-9c6b-4e4dc3168ddfed5db4a1-5...\n",
"39 aa252dd8-4d8e-45ec-ab5e-a531085c2459aa252dd8-4...\n",
"40 6aff4eb5-c6f3-4a3a-8052-0e9d2525969d6aff4eb5-c...\n",
"41 e4909d99-cf20-4334-835b-598c66ee2ce1e4909d99-c..."
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_diff_for_two_dataframes_by_single_col(ids_from_database, converted_files_df)"
]
},
{
"cell_type": "markdown",
"id": "ee2080ff-3a50-4170-894e-c617509ac578",
"metadata": {},
"source": [
" - length of array is the same, so no ifc files from the database are included there\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0af46bf5-95f2-4fbf-af03-e2ba0f94c1f9",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "47cd4a1f-96b9-4cdd-9d50-9fcb9a1000e2",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}