Compare commits
2 Commits
d1d7d1b58d
...
2bf3ab4410
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2bf3ab4410 | ||
|
|
6de13bec2b |
@@ -30,7 +30,7 @@ PATH_TO_JUPYTER_FOLDER="/mnt/c/Users/$(echo whoami)/git/aoe2-data/jupyter/src" d
|
|||||||
jupyter/scipy-notebook:2023-03-27
|
jupyter/scipy-notebook:2023-03-27
|
||||||
```
|
```
|
||||||
|
|
||||||
|
-v /mnt/c/Users/dechert/git/aoe2-data/jupyter/:/home/marcelo/ \
|
||||||
`docker logs -f jupyter` to see the url with secret
|
`docker logs -f jupyter` to see the url with secret
|
||||||
|
|
||||||
configuration:
|
configuration:
|
||||||
|
|||||||
238611
jupyter/src/encore-files/filenames-only.csv
Normal file
238611
jupyter/src/encore-files/filenames-only.csv
Normal file
File diff suppressed because it is too large
Load Diff
43
jupyter/src/encore-files/project.csv
Normal file
43
jupyter/src/encore-files/project.csv
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
id,address,building_type,city,country,customer_name,description,name,no_of_habitants,postal_code,projectuuid,state,year_of_construction,bms_information_id,project_coordinator_id,project_tag_id,ifc_before_tagid,ifc_to_be_tagid,ifc_enhanced_tagid
|
||||||
|
10,Rainbow Road 2,"",Albons,Spain,Norena,"Added by Encore Admin, please replace with project description",Building02_EDEA,0,"Added by Encore Admin, please replace with postal code",08d3053f-7688-4448-8d90-960800825c51,Catalonia,0,1,8,97a3e117-a4b9-463f-81ed-8a0fd6826b12,f28b28ff-60e4-4492-8b64-33cbfab13f53,fed95db4-26e6-40cb-83b4-d0d3d6923a18,b041ef00-1182-414b-86c2-2f7a55584c36
|
||||||
|
11,Rainbow Road 2,"",Albons,Spain,Norena,Building02_EDEA,Building02_EDEA_2,0,"Added by Encore Admin, please replace with postal code",b72c2cda-17e0-48c1-aa50-e10ed9209555,Catalonia,0,,8,,,,
|
||||||
|
12,"San Juan, 2",Residential,Albons,Spain,Norena,"Added by Encore Admin, please replace with project description",EDEA01,0,"Added by Encore Admin, please replace with postal code",fcedb584-8e61-42af-a9e8-50ff96dd3ca0,Catalonia,2000,,8,,,,
|
||||||
|
13,Musterstrasse,private,Bremen,Germany,JEA,description,JEA Test Projekt,4,12345,bb394dc0-8d30-463d-8ed7-a48241484d87,Bremen,1999,,16,,,,
|
||||||
|
14,unknow address,"",Abenberg,Germany,Ruben Navarro,Test description,Test BREA,10,"Added by Encore Admin, please replace with postal code",dd0d07dd-6593-4a13-9cd7-b3c78494c2e1,Bavaria,1980,,15,b27ac00c-0b6f-4909-81db-ef8c136c1c28,8fd5459f-49d1-45f5-a265-93c65f113e14,a3279bf7-e99c-419d-aeeb-d543336a7a52,5887be41-0fea-478a-b1b7-d7ac7e73579a
|
||||||
|
16,xxxxxxxxxxxxxxxxxxxxxxx,"",Altino,Italy,xxxxxxx,"Added by Encore Admin, please replace with project description",Case Study 7 : Belletti 2.0,0,60126,bccbb9f7-61d8-44a7-88e0-bd9d3f8c4578,Abruzzo,2021,,17,,,,
|
||||||
|
19,TEST2123123,"",Island Harbour,Anguilla,TEst name,This is a test description,Test Project,0,2132,41c48a2a-6097-4af7-a1b6-8eb46b1026c5,Island Harbour,0,19,11,d48ed765-b997-4998-b48f-d5c6f37764a2,b0b31674-1f3e-4b9d-aa02-a0d80851c242,800ccc5b-51da-4919-9de2-1c57a463eaef,8cbba3d3-bc16-404f-9873-9d15aa8ec366
|
||||||
|
34,Calle Herreros Nº 2,Residential,Caceres,Spain,EDEA-CICE Centro de Innovación y Calidad de la Edificación,Increase energy efficiency of demonstrator building and validate ENCORE solution in this pilot case,Encore Pilot Case,2,10005,2e9c538e-c508-4f2e-b352-7a388c327e36,Extremadura,1990,14,4,,,,
|
||||||
|
38,"Calle Herreros, 5",Residential,Caceres,Spain,Norena Martín-Dorta,Test Cáceres 20220218,20220218_Test01,4,10005,e3362336-f3d6-4b31-9664-06ed0c47cecc,Extremadura,2000,,8,3fe6a2a0-dcf6-4108-b079-085e7817b933,917bdf0b-fa09-46fe-b24b-2179ee9982f0,9c34f225-88f5-407d-850c-eb4a59c8d917,63a0ce94-2b5e-44be-b2e0-50a3ba3c6317
|
||||||
|
39,"PLAZA HONDURAS, 9 P.1","",Valencia,Spain,"",Test of the integration between ENCORE and BREA,Test Integration ENCORE BREA,0,46022,31eaaa84-c317-425b-b61c-4563d104e19b,Valencia,0,,28,4036063b-84bf-4312-883d-306648307e81,008f6b8a-70a3-460c-a3bb-9d8e2c39d208,827149ae-d6fc-49e6-a5a4-a37ddf602070,58eea86b-2c17-40d5-812a-62a4b1a2076b
|
||||||
|
40,"Plaza Honduras, 9","",Valencia,Spain,"",Test ENCORE 1,Test ENCORE 1,0,46022,00f62708-f43f-48a6-b3cd-029f234e5b49,Valencia,0,,28,6e36b3b6-1b35-46a0-89b4-c64b2dcfc4d2,8615354f-c878-4d09-94ce-a4f1fdd36096,5505ceb9-a5cb-4e07-bc25-a94c9fec5f53,9a1efeb9-b98d-4e94-9040-2156e63bb3ff
|
||||||
|
41,Diogenous 1,Residential,Tseri,Cyprus,RTD TALOS LTD,lorem ispum dolor sit amet,Test Cy project,4,2404,de3c0055-993f-4592-bf0f-78bfda046e4e,Nicosia,2021,16,29,,,,
|
||||||
|
45,xxxxddddddddd,"",Caceres,Spain,xxx,xxxx,Caceres Renovation UNIVPM,0,xxx,486ae426-4c1b-45fb-bc31-8a6a177e2d58,Caceres,0,,17,6c886d15-2ddf-489e-8440-066ad5c0c051,2c4999d3-e3d2-4f43-931d-6f0e19c37204,ce3831d3-4079-4bf8-8802-47e4eb748005,5d18c46b-d571-4a71-bd81-7289cb3e962a
|
||||||
|
46,Vorkampsweg 166A,"",Caceres,Spain,"",ODAVS validation in Caceres,Caceres Renovation EDEA,0,28359,32955c3a-77ff-4063-a010-1c6db98ee497,Caceres,0,,16,6ba02956-f25b-4a68-8c02-0d6308b4460f,176d585b-0b2a-4159-bf53-fc3faf72a9f7,35fc1934-259a-40ef-aff6-91fcceffb5ab,2d9fd19d-cf60-408c-ba4d-b7ed874e55cc
|
||||||
|
49,"via Brecce Bianche, Ancona",university,Ancona,Italy,UNIVPM,demo project with univpm model,UNIVPM Demo,0,60100,b1a82a82-d618-41c1-a3c4-fdccb1f30bfa,Ancona,1970,38,17,cc1e623d-ba33-4f84-8bfb-7d19d062cc20,f3b2a7db-0417-4246-a9ed-1da4572abb21,192f3fb6-a9c1-4ce4-87f4-7be594fd3c33,9e5d95a6-c4f0-4789-af90-5e239e0deb69
|
||||||
|
50,Calle Herreros Nº 2,Residential,Caceres,Spain,EDEA-CICE Centro de Innovación y Calidad de la Edificación,Increase energy efficiency of demonstrator building and validate ENCORE solution in this pilot case,Encore Pilot Case_2,2,10005,40d8e4d7-261d-4f83-af17-26e88f43a410,Caceres,1990,20,26,fe04ff9b-c1ff-41c4-9377-b7df1cfe2d22,cfca8119-5cbe-479f-befa-1d05d8c8e655,ffa2af9c-d7f2-4123-aded-f973464ac0a2,a61767be-da0b-45bf-8b5a-0e2ff42292eb
|
||||||
|
51,Via Brecce Bianche 12,tipo,Ancona,Italy,Mario Rossi,Descrizione progetto,Progetto di prova,10,60131,8e46e249-2da6-42f2-8d7f-6396b25be952,Ancona,20000,,34,aa0efa31-7f46-4511-bf25-1a14f724d43b,01f6dcd2-880f-43cb-801b-457bd5e12823,510de694-13d7-48f8-a82f-23bacd667e4d,cf057fda-a670-404a-a7fc-5df39c42f993
|
||||||
|
52,Via Fezzan 39,Residential,Roma,Italy,"",progetto di prova,Progetto di prova_2,30,00199,223aa6ed-743f-46f4-b275-b7c034fbad31,Roma,2010,,36,,,,
|
||||||
|
54,"via Brecce Bianche, Ancona",ita,Ancona,Italy,"",IFC example from https://github.com/buildingSMART/Sample-Test-Files,Dental Clinic,0,60100,22ec0887-d823-4d4b-9f4a-96ed966cd0ab,Marche,0,,17,,,,
|
||||||
|
55,"via Brecce Bianche, Ancona","",Ancona,Italy,"",Test the AWOPS platform,Test Francesco,0,60100,71404615-92f7-4c6d-a713-8ff4a75766db,Marche,0,,17,5d768096-e9e0-4b2d-bd3b-5e48349fd4ca,89657ca7-3302-45ac-b321-16d5ec412ba4,bde2e19b-d7c6-4908-b891-2243cbdb53a2,58bb0db7-19ea-4c5a-af2e-4cc19082e0be
|
||||||
|
63,Linyer Strasse 5,"",Baden,France,"",Windows should be replaced bz modern 3ßlazer windows,Office Renovation,0,28359,d54f03d6-057d-4b86-b7ec-38a047dd1ba5,Bretagne,0,25,18,bdd29bf3-c04a-4dd2-85b8-28195d167dff,ddbef2cc-51f9-4ab7-b7f5-b6f19ad77d39,dba20ad2-873d-423a-84b6-386b054696ac,d1abe974-67c9-4840-9a59-82cecd9ebe0c
|
||||||
|
64,Linyer Strasse 5,"",Tbilisi,Georgia,Windows should be replaced bz modern 3ßlazer windows,Windows should be replaced bz modern 3ßlazer windows,Office Renovation1,0,28359,62b727bb-94f8-42e0-be8d-08aac82cfd50,Tbilisi,0,,18,67001609-3c2d-4963-9838-b211b0f50106,ce3e9cb8-88eb-4a0e-9eca-1e452cbd3390,79486f35-6e37-4650-91ca-df6bb2e30c64,ec43ba82-8183-4d30-be9d-339936b24ee8
|
||||||
|
65,Linyer Strasse 5,"",Alburquerque,Spain,"",qweqwe,Renovation caceres,1,28359,abd1da67-0185-4838-aedf-ba4e34537233,Extremadura,0,,6,9747c628-6b97-4b21-a9b2-7a249afedafb,3fca1bfd-eebe-44a9-ad10-23a8b89dc36b,386b5858-460b-4a67-962a-aca88ee4ae93,c436e9c3-e960-4444-9b9e-a59d8537e458
|
||||||
|
66,Street,"",Lumeje,Angola,"",This is a test,ENCORETEST,0,1232,4156f8a2-f32c-495f-9747-58fbbbe7daa0,Moxico Province,0,,11,02cafcb1-98aa-4a7c-bf1e-ddf55a9d5fbe,49e3b841-69c2-4063-9678-b550f0ee3dbb,8fe4ba5a-a1fc-4864-ae65-e6b4401dccca,e7cb3ed2-5cc1-4fd2-ae9f-62e224bc621a
|
||||||
|
67,Some Street,"",Cachorrilla,Spain,EDEA,This is a test,Renovation EDEA candidate test,12,21212,133c0ca5-f390-4532-9514-403912fc254d,Extremadura,1999,,14,ebff9dd8-e032-4bed-ab15-67279baf9604,b61a156c-8eb6-4bf7-a6da-8b10ce2ad19b,d147dbab-1018-407b-a7ad-48eb6d28b6f8,9c3cbfc5-3e96-4e78-ac99-f17737239871
|
||||||
|
68,Some other street,"",Encamp,Andorra,"",Office Windows are old and have bad insulation,Renovation UNIVPM candidate test,0,AD200,e724e930-059c-4541-99aa-9a2b7f94cad5,Encamp,0,,14,2aa3427a-5388-4bf7-8076-681f1b8ad5d4,e5da2f78-fada-4d0b-b4dd-e0915039d0d8,8138cb43-5f1f-4721-810c-8ae0fa8cfdac,a45e9d34-da1b-44d6-a87f-8b52b20b0634
|
||||||
|
69,Street,"",N’dalatando,Angola,"",This is a test,testCreateProject,0,1232,3d680b2a-ec24-4ecd-9614-b90cdb70d145,Cuanza Norte Province,0,,11,246a4471-9477-4adf-9ec5-42ad325733a6,405cc1d9-fc87-4b67-b250-d185a10fcbd9,285a4d97-69cc-4a6e-be9d-c1f4b264bb2e,d7a801d2-9671-475f-9cf3-62aeab851d05
|
||||||
|
70,linzer straße,"",Bremen,Germany,"",Windows should be replaced bz modern 3ßlazer windows,my project,0,28211,586ae34d-525b-49b8-956c-146155e4954c,Bremen,0,26,18,337f2a2e-2826-4bc8-83c2-8c0459781c1d,4e4a60e2-d300-4dee-a24c-0c894ec1d1e7,11e6826b-e444-40fe-82b4-e68babffdc12,570c6659-7d60-4f76-a819-4760d3efdbc9
|
||||||
|
71,schwachhauser ring 5,"",Bremen,Germany,"",Windows should be replaced bz modern 3ßlazer windows,Bremen Test,0,28359,b4f1964d-34ef-4950-82aa-efd1186539d4,Bremen,0,,18,7ac6f1a5-9ecb-4ab2-9fc1-7abad928544e,e56dc630-1b29-4431-bd69-a5f215fa9446,0086ea3d-b4a9-47c1-a065-04d03a07b4ca,3077e266-5492-447a-bad3-69c5c0004f63
|
||||||
|
72,Schwachhauser Ring 100,Residential,Bremen,Germany,Marko Mustermann,Wall insulation needs to be improved,sept14-final,5,28359,15e707ea-ace8-4921-b3a6-c7e6962e87bc,Bremen,2007,27,18,a258185a-f60c-452e-82e3-572a544f22cc,3efbbf7d-13f9-4f69-a727-6dd8954ae388,87e4bcf5-cce1-44ce-a1ee-b92f187e10df,6e51fc46-96b0-4bf9-a1b7-cf65aedf2784
|
||||||
|
73,Calle Herreros Nº 2,Residential,Cáceres,Spain,EDEA-CICE Centro de Innovación y Calidad de la Edificación,Increase energy efficiency of demonstrator building and validate ENCORE solution in this pilot case,ENCORE Pilot Building Renovation,5,10005,8791cca1-2eb9-4b8e-8f1b-5f72603411de,Extremadura,1990,39,11,e7bde166-4910-407b-9b1d-1943fbcd6da4,873c2573-bd0a-432b-bb0a-d284fdc1ce61,db2a483d-bc32-47f4-aca5-029487a73e7a,f5cdf5da-61f1-42df-8f45-c48494d6dacd
|
||||||
|
74,Some Road,"",Cáceres,Spain,"",This is a description,Example Renovation Project,0,12345,87cbef9f-16e0-4c8c-8d56-3dc9384d9cdf,Extremadura,0,,14,5660466b-2600-4b9e-bbce-faf192cfc906,5988181d-9168-4831-9039-c7636baa0865,3d79d2c2-d9bc-49f4-9083-a0a681314834,68f19532-51e8-427f-8c77-fb644d1dd172
|
||||||
|
75,Schwachhauser Ring 100,"",Bremen,Germany,"",Windows should be replaced bz modern 3ßlazer windows,sept-15-brea-test,0,28359,ebc195fe-d774-49c6-8759-183b5d145b0e,Bremen,0,,6,c4cd356d-9d70-4952-8f18-39b3807c81df,188d4752-3dc8-4c6c-a211-7447e8126496,369fedd6-e65e-4ee0-8a60-4a011723871e,26f3b83b-a23c-42c2-a008-9b7072093462
|
||||||
|
76,Calle Herreros,"",Cáceres,Spain,"",Test EDEA Building,Test EDEA Building,0,10005,913a2d18-70db-4dc9-8c4c-afc2f21f0f5c,Extremadura,0,,28,1fc260f6-4713-4b06-a680-38614db8d956,37e0c186-10fb-4303-96c0-318dd74632a7,14ce4da1-b660-483d-8cb5-467436a529f4,8b685acb-e840-497a-bb40-8ceea5218818
|
||||||
|
77,Via delle Brecce Bianche 12,"",Ancona,Italy,"",Test_new_project,Test_new_project,0,60131 ,27ff39e9-570e-4d86-a9c9-229b7dccab59,Marche,0,,38,432c91b2-ff25-469f-a623-3f39d5ed67d2,a3833b66-54a4-45bd-a2c0-cfa767adeb09,8572cd4b-7d3b-47d6-b903-7ede83baf3eb,d97cd720-212a-4aa1-8b9c-5384005e917a
|
||||||
|
78,Av cid 116,"",Valencia,Spain,Ruben,Prueba,Test BREA 2,0,46018,f27a9530-1cd2-46fc-9aa2-45bcdf4dddb8,Valencia,1980,,15,7397b503-33ab-47ef-acbe-09edb2176ff3,dead7fc1-2ba9-450f-947a-79f143a7fe5d,1fb77130-2813-4612-9635-24cbb5b3b996,79ebb4d7-6c08-4e11-9e47-e781bab9ee22
|
||||||
|
79,Addresstr. 2,"",Aknashen,Armenia,"",description,Testproject,0,24987,7891a87f-f119-483f-aec4-24e6b14cfdee,Armavir Region,0,,19,8269294e-a9b0-483c-8abe-25dbda0656cf,6117b251-1d89-45fe-b209-5fdd649b8062,561eb2a1-7c87-45e0-b7aa-00f580eacd01,bc0233c8-3e60-4f89-85dc-b8b73b487992
|
||||||
|
82,stree of stp,"",Vila Real,Portugal,"",test prj,test prj,0,5000,e8ca4a31-59c5-462d-9d96-18dd59d9c786,Vila Real,0,36,39,f2b10aa9-536d-4968-ba7b-eaa98e717f43,11a32d85-870d-4e17-b1c8-15300616576f,da92d861-796e-46c5-ae72-909be54b1e90,218f6664-80bd-41f5-8346-e29137f0b25f
|
||||||
|
83,Calle Miraflores 44,"",Santa Cruz de Tenerife,Spain,"",Miraflores Building,Test Miraflores Building,0,38003,d3ae74bc-1119-4eb4-8016-d3ff6b6927dd,Canary Islands,0,,28,ed5db4a1-5560-4753-9c6b-4e4dc3168ddf,6513aa25-b153-4b11-9aac-2695a822c901,e640c845-9b93-4ea9-8bd8-2b8f0c0a40c9,bf16ab3f-2e34-473c-a770-3894d8be9ceb
|
||||||
|
84,TESTENCORE,OFFICE,Eibar,Spain,YO,TEST,TESTENCORE,300,20600,fac8feaf-a0d6-40be-b602-ea616378013a,Gipuzkoa,2022,,42,aa252dd8-4d8e-45ec-ab5e-a531085c2459,cff5c784-2cf3-4979-ae27-cc69ac4bcdcb,e893b9a4-bb71-4c1f-9872-f178fcfbbf54,6b8cceb7-ddcf-42c0-97b1-c3b547ac7cb0
|
||||||
|
85,viale petrocchi,Condominio Plurifamiliare,Algete,Spain,Miriam,Edificio plurifamiliare,DIAEE_P1,35,17,027af6c1-1eff-4ddf-a6dc-6fd8fc6ce008,Madrid,1980,,43,6aff4eb5-c6f3-4a3a-8052-0e9d2525969d,0b2b25c2-d872-4b5a-ab32-47d3089f4c65,861df18d-eab0-45dc-895f-610d93ba4f1d,fcfe99b1-a3b2-408b-998c-dc28b52d1e1c
|
||||||
|
86,paris,"",Chartrettes,France,"",renovation,testttttttttt,0,54000,ee8d7daa-635c-4765-9845-4826dd58e27e,Île-de-France,0,40,45,e4909d99-cf20-4334-835b-598c66ee2ce1,5bc72ef7-b913-4f84-b337-89a4a90e4374,6d352e5f-3b24-41c4-948a-c8cb03a68599,eca30c74-ff53-4209-a470-c1ad7a3b72ea
|
||||||
|
893
jupyter/src/folder-compare.ipynb
Normal file
893
jupyter/src/folder-compare.ipynb
Normal file
@@ -0,0 +1,893 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "53445797-52c4-4443-8095-889cf1c24298",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%load_ext autoreload\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "4de950c8-cabe-4874-aa78-548342993b05",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%autoreload 1"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "bccbd4ee-e6e7-4bc5-b72c-98196e811766",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 27,
|
||||||
|
"id": "7b337190-7887-40b7-9d10-b8039f5f2dfd",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"project_df = pd.read_csv('encore-files/project.csv')\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"id": "44729805-deb3-4c61-b019-c9f52c5898f0",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"Index(['id', 'address', 'building_type', 'city', 'country', 'customer_name',\n",
|
||||||
|
" 'description', 'name', 'no_of_habitants', 'postal_code', 'projectuuid',\n",
|
||||||
|
" 'state', 'year_of_construction', 'bms_information_id',\n",
|
||||||
|
" 'project_coordinator_id', 'project_tag_id', 'ifc_before_tagid',\n",
|
||||||
|
" 'ifc_to_be_tagid', 'ifc_enhanced_tagid'],\n",
|
||||||
|
" dtype='object')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"project_df.columns"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "561aa71c-c506-47b7-8b5f-6c00104a8e91",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"only_ifc_ids=project_df[['project_tag_id', 'c',\n",
|
||||||
|
" 'ifc_to_be_tagid', 'ifc_enhanced_tagid']]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "0af0d724-4dc5-434d-8209-e6604330c4e9",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"only_ifc_ids"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"id": "d1fe283a-35cd-4bf6-80ef-bd1a68ac2961",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def merge_cols(df):\n",
|
||||||
|
" df[\"all_ids\"] = df[\"project_tag_id\"] + df[\"project_tag_id\"] + df[\"ifc_to_be_tagid\"] + df[\"ifc_enhanced_tagid\"]\n",
|
||||||
|
" return df[[\"all_ids\"]]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 19,
|
||||||
|
"id": "bfdca4ba-edf0-4394-b20d-8ab04130a30f",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/tmp/ipykernel_98/3701186255.py:2: SettingWithCopyWarning: \n",
|
||||||
|
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||||
|
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||||
|
"\n",
|
||||||
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||||||
|
" df[\"all_ids\"] = df[\"project_tag_id\"] + df[\"project_tag_id\"] + df[\"ifc_to_be_tagid\"] + df[\"ifc_enhanced_tagid\"]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"ids_from_database = merge_cols(only_ifc_ids)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 24,
|
||||||
|
"id": "b8aee40a-7cc9-41b0-a1ef-4c3ed293f229",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>all_ids</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>97a3e117-a4b9-463f-81ed-8a0fd6826b1297a3e117-a...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>b27ac00c-0b6f-4909-81db-ef8c136c1c28b27ac00c-0...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>5</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>6</th>\n",
|
||||||
|
" <td>d48ed765-b997-4998-b48f-d5c6f37764a2d48ed765-b...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>7</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>8</th>\n",
|
||||||
|
" <td>3fe6a2a0-dcf6-4108-b079-085e7817b9333fe6a2a0-d...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>9</th>\n",
|
||||||
|
" <td>4036063b-84bf-4312-883d-306648307e814036063b-8...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>10</th>\n",
|
||||||
|
" <td>6e36b3b6-1b35-46a0-89b4-c64b2dcfc4d26e36b3b6-1...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>11</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>12</th>\n",
|
||||||
|
" <td>6c886d15-2ddf-489e-8440-066ad5c0c0516c886d15-2...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>13</th>\n",
|
||||||
|
" <td>6ba02956-f25b-4a68-8c02-0d6308b4460f6ba02956-f...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>14</th>\n",
|
||||||
|
" <td>cc1e623d-ba33-4f84-8bfb-7d19d062cc20cc1e623d-b...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>15</th>\n",
|
||||||
|
" <td>fe04ff9b-c1ff-41c4-9377-b7df1cfe2d22fe04ff9b-c...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>16</th>\n",
|
||||||
|
" <td>aa0efa31-7f46-4511-bf25-1a14f724d43baa0efa31-7...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>17</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>18</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>19</th>\n",
|
||||||
|
" <td>5d768096-e9e0-4b2d-bd3b-5e48349fd4ca5d768096-e...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>20</th>\n",
|
||||||
|
" <td>bdd29bf3-c04a-4dd2-85b8-28195d167dffbdd29bf3-c...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>21</th>\n",
|
||||||
|
" <td>67001609-3c2d-4963-9838-b211b0f5010667001609-3...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>22</th>\n",
|
||||||
|
" <td>9747c628-6b97-4b21-a9b2-7a249afedafb9747c628-6...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>23</th>\n",
|
||||||
|
" <td>02cafcb1-98aa-4a7c-bf1e-ddf55a9d5fbe02cafcb1-9...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>24</th>\n",
|
||||||
|
" <td>ebff9dd8-e032-4bed-ab15-67279baf9604ebff9dd8-e...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>25</th>\n",
|
||||||
|
" <td>2aa3427a-5388-4bf7-8076-681f1b8ad5d42aa3427a-5...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>26</th>\n",
|
||||||
|
" <td>246a4471-9477-4adf-9ec5-42ad325733a6246a4471-9...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>27</th>\n",
|
||||||
|
" <td>337f2a2e-2826-4bc8-83c2-8c0459781c1d337f2a2e-2...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>28</th>\n",
|
||||||
|
" <td>7ac6f1a5-9ecb-4ab2-9fc1-7abad928544e7ac6f1a5-9...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>29</th>\n",
|
||||||
|
" <td>a258185a-f60c-452e-82e3-572a544f22cca258185a-f...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>30</th>\n",
|
||||||
|
" <td>e7bde166-4910-407b-9b1d-1943fbcd6da4e7bde166-4...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>31</th>\n",
|
||||||
|
" <td>5660466b-2600-4b9e-bbce-faf192cfc9065660466b-2...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>32</th>\n",
|
||||||
|
" <td>c4cd356d-9d70-4952-8f18-39b3807c81dfc4cd356d-9...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>33</th>\n",
|
||||||
|
" <td>1fc260f6-4713-4b06-a680-38614db8d9561fc260f6-4...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>34</th>\n",
|
||||||
|
" <td>432c91b2-ff25-469f-a623-3f39d5ed67d2432c91b2-f...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>35</th>\n",
|
||||||
|
" <td>7397b503-33ab-47ef-acbe-09edb2176ff37397b503-3...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>36</th>\n",
|
||||||
|
" <td>8269294e-a9b0-483c-8abe-25dbda0656cf8269294e-a...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>37</th>\n",
|
||||||
|
" <td>f2b10aa9-536d-4968-ba7b-eaa98e717f43f2b10aa9-5...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>38</th>\n",
|
||||||
|
" <td>ed5db4a1-5560-4753-9c6b-4e4dc3168ddfed5db4a1-5...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>39</th>\n",
|
||||||
|
" <td>aa252dd8-4d8e-45ec-ab5e-a531085c2459aa252dd8-4...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>40</th>\n",
|
||||||
|
" <td>6aff4eb5-c6f3-4a3a-8052-0e9d2525969d6aff4eb5-c...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>41</th>\n",
|
||||||
|
" <td>e4909d99-cf20-4334-835b-598c66ee2ce1e4909d99-c...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" all_ids\n",
|
||||||
|
"0 97a3e117-a4b9-463f-81ed-8a0fd6826b1297a3e117-a...\n",
|
||||||
|
"1 NaN\n",
|
||||||
|
"2 NaN\n",
|
||||||
|
"3 NaN\n",
|
||||||
|
"4 b27ac00c-0b6f-4909-81db-ef8c136c1c28b27ac00c-0...\n",
|
||||||
|
"5 NaN\n",
|
||||||
|
"6 d48ed765-b997-4998-b48f-d5c6f37764a2d48ed765-b...\n",
|
||||||
|
"7 NaN\n",
|
||||||
|
"8 3fe6a2a0-dcf6-4108-b079-085e7817b9333fe6a2a0-d...\n",
|
||||||
|
"9 4036063b-84bf-4312-883d-306648307e814036063b-8...\n",
|
||||||
|
"10 6e36b3b6-1b35-46a0-89b4-c64b2dcfc4d26e36b3b6-1...\n",
|
||||||
|
"11 NaN\n",
|
||||||
|
"12 6c886d15-2ddf-489e-8440-066ad5c0c0516c886d15-2...\n",
|
||||||
|
"13 6ba02956-f25b-4a68-8c02-0d6308b4460f6ba02956-f...\n",
|
||||||
|
"14 cc1e623d-ba33-4f84-8bfb-7d19d062cc20cc1e623d-b...\n",
|
||||||
|
"15 fe04ff9b-c1ff-41c4-9377-b7df1cfe2d22fe04ff9b-c...\n",
|
||||||
|
"16 aa0efa31-7f46-4511-bf25-1a14f724d43baa0efa31-7...\n",
|
||||||
|
"17 NaN\n",
|
||||||
|
"18 NaN\n",
|
||||||
|
"19 5d768096-e9e0-4b2d-bd3b-5e48349fd4ca5d768096-e...\n",
|
||||||
|
"20 bdd29bf3-c04a-4dd2-85b8-28195d167dffbdd29bf3-c...\n",
|
||||||
|
"21 67001609-3c2d-4963-9838-b211b0f5010667001609-3...\n",
|
||||||
|
"22 9747c628-6b97-4b21-a9b2-7a249afedafb9747c628-6...\n",
|
||||||
|
"23 02cafcb1-98aa-4a7c-bf1e-ddf55a9d5fbe02cafcb1-9...\n",
|
||||||
|
"24 ebff9dd8-e032-4bed-ab15-67279baf9604ebff9dd8-e...\n",
|
||||||
|
"25 2aa3427a-5388-4bf7-8076-681f1b8ad5d42aa3427a-5...\n",
|
||||||
|
"26 246a4471-9477-4adf-9ec5-42ad325733a6246a4471-9...\n",
|
||||||
|
"27 337f2a2e-2826-4bc8-83c2-8c0459781c1d337f2a2e-2...\n",
|
||||||
|
"28 7ac6f1a5-9ecb-4ab2-9fc1-7abad928544e7ac6f1a5-9...\n",
|
||||||
|
"29 a258185a-f60c-452e-82e3-572a544f22cca258185a-f...\n",
|
||||||
|
"30 e7bde166-4910-407b-9b1d-1943fbcd6da4e7bde166-4...\n",
|
||||||
|
"31 5660466b-2600-4b9e-bbce-faf192cfc9065660466b-2...\n",
|
||||||
|
"32 c4cd356d-9d70-4952-8f18-39b3807c81dfc4cd356d-9...\n",
|
||||||
|
"33 1fc260f6-4713-4b06-a680-38614db8d9561fc260f6-4...\n",
|
||||||
|
"34 432c91b2-ff25-469f-a623-3f39d5ed67d2432c91b2-f...\n",
|
||||||
|
"35 7397b503-33ab-47ef-acbe-09edb2176ff37397b503-3...\n",
|
||||||
|
"36 8269294e-a9b0-483c-8abe-25dbda0656cf8269294e-a...\n",
|
||||||
|
"37 f2b10aa9-536d-4968-ba7b-eaa98e717f43f2b10aa9-5...\n",
|
||||||
|
"38 ed5db4a1-5560-4753-9c6b-4e4dc3168ddfed5db4a1-5...\n",
|
||||||
|
"39 aa252dd8-4d8e-45ec-ab5e-a531085c2459aa252dd8-4...\n",
|
||||||
|
"40 6aff4eb5-c6f3-4a3a-8052-0e9d2525969d6aff4eb5-c...\n",
|
||||||
|
"41 e4909d99-cf20-4334-835b-598c66ee2ce1e4909d99-c..."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 24,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"ids_from_database"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"id": "f52b6cc0-e0ea-479b-a24e-d4bcfa769bd2",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"converted_files_df = pd.read_csv('encore-files/filenames-only.csv')\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 26,
|
||||||
|
"id": "da181f14-a495-4950-b4f4-cba92b099184",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>foldername</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238590</th>\n",
|
||||||
|
" <td>fffb5e51-74ec-4829-8074-fc095346c8a6</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238591</th>\n",
|
||||||
|
" <td>fffc44dd-2c8d-4d71-b08e-e192e7ad9b37</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238592</th>\n",
|
||||||
|
" <td>fffc79c6-5f47-4641-87c6-57cc28478f73</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238593</th>\n",
|
||||||
|
" <td>fffcc424-c740-46da-b57c-cc97be249524</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238594</th>\n",
|
||||||
|
" <td>fffcda08-0da5-4c2a-ab94-fd49477d18dc</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238595</th>\n",
|
||||||
|
" <td>fffd27c4-ca0b-4067-96d8-848f50a43224</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238596</th>\n",
|
||||||
|
" <td>fffdab41-f136-4eaa-b870-68aff14e0763</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238597</th>\n",
|
||||||
|
" <td>fffdedf2-3063-4da8-b7da-cfd7eb76cf90</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238598</th>\n",
|
||||||
|
" <td>fffdf3d8-7069-4c19-90b5-d96d7459b54d</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238599</th>\n",
|
||||||
|
" <td>fffe0eff-dd7e-4dbb-9f71-29401c553e25</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238600</th>\n",
|
||||||
|
" <td>ffff29a7-ae72-4620-9abf-e3919bd23fe5</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238601</th>\n",
|
||||||
|
" <td>ffff502d-7393-4425-898d-21fefd5bc0e3</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238602</th>\n",
|
||||||
|
" <td>ffff5b42-575a-48c0-8093-61db051f903b</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238603</th>\n",
|
||||||
|
" <td>ffff83bb-bbc8-43c1-a219-e35a5b2e28bd</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238604</th>\n",
|
||||||
|
" <td>ffffc0e5-0a22-41b1-9d50-c532e2c313aa</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238605</th>\n",
|
||||||
|
" <td>filenames-only.txt</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238606</th>\n",
|
||||||
|
" <td>filenames.txt</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238607</th>\n",
|
||||||
|
" <td>files.txt</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238608</th>\n",
|
||||||
|
" <td>files2.txt</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>238609</th>\n",
|
||||||
|
" <td>index.json</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" foldername\n",
|
||||||
|
"238590 fffb5e51-74ec-4829-8074-fc095346c8a6\n",
|
||||||
|
"238591 fffc44dd-2c8d-4d71-b08e-e192e7ad9b37\n",
|
||||||
|
"238592 fffc79c6-5f47-4641-87c6-57cc28478f73\n",
|
||||||
|
"238593 fffcc424-c740-46da-b57c-cc97be249524\n",
|
||||||
|
"238594 fffcda08-0da5-4c2a-ab94-fd49477d18dc\n",
|
||||||
|
"238595 fffd27c4-ca0b-4067-96d8-848f50a43224\n",
|
||||||
|
"238596 fffdab41-f136-4eaa-b870-68aff14e0763\n",
|
||||||
|
"238597 fffdedf2-3063-4da8-b7da-cfd7eb76cf90\n",
|
||||||
|
"238598 fffdf3d8-7069-4c19-90b5-d96d7459b54d\n",
|
||||||
|
"238599 fffe0eff-dd7e-4dbb-9f71-29401c553e25\n",
|
||||||
|
"238600 ffff29a7-ae72-4620-9abf-e3919bd23fe5\n",
|
||||||
|
"238601 ffff502d-7393-4425-898d-21fefd5bc0e3\n",
|
||||||
|
"238602 ffff5b42-575a-48c0-8093-61db051f903b\n",
|
||||||
|
"238603 ffff83bb-bbc8-43c1-a219-e35a5b2e28bd\n",
|
||||||
|
"238604 ffffc0e5-0a22-41b1-9d50-c532e2c313aa\n",
|
||||||
|
"238605 filenames-only.txt\n",
|
||||||
|
"238606 filenames.txt\n",
|
||||||
|
"238607 files.txt\n",
|
||||||
|
"238608 files2.txt\n",
|
||||||
|
"238609 index.json"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 26,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"converted_files_df.tail(20)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "fd2c24b7-ddb1-4d77-9a38-7e11599e91c5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"converted_files_df['foldername']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 22,
|
||||||
|
"id": "2fe47979-1a5b-41b4-9b11-5cb4616e6fd8",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def get_diff_for_two_dataframes(df1, df2):\n",
|
||||||
|
" \"\"\"Returns set of values unique to df1\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" diff = df1[df1['all_ids'].isin(df2['foldername']) == False]\n",
|
||||||
|
" return diff"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 25,
|
||||||
|
"id": "5f5a9b4e-91e0-4cbf-8f31-e5cda02d88dc",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>all_ids</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>97a3e117-a4b9-463f-81ed-8a0fd6826b1297a3e117-a...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>b27ac00c-0b6f-4909-81db-ef8c136c1c28b27ac00c-0...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>5</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>6</th>\n",
|
||||||
|
" <td>d48ed765-b997-4998-b48f-d5c6f37764a2d48ed765-b...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>7</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>8</th>\n",
|
||||||
|
" <td>3fe6a2a0-dcf6-4108-b079-085e7817b9333fe6a2a0-d...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>9</th>\n",
|
||||||
|
" <td>4036063b-84bf-4312-883d-306648307e814036063b-8...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>10</th>\n",
|
||||||
|
" <td>6e36b3b6-1b35-46a0-89b4-c64b2dcfc4d26e36b3b6-1...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>11</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>12</th>\n",
|
||||||
|
" <td>6c886d15-2ddf-489e-8440-066ad5c0c0516c886d15-2...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>13</th>\n",
|
||||||
|
" <td>6ba02956-f25b-4a68-8c02-0d6308b4460f6ba02956-f...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>14</th>\n",
|
||||||
|
" <td>cc1e623d-ba33-4f84-8bfb-7d19d062cc20cc1e623d-b...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>15</th>\n",
|
||||||
|
" <td>fe04ff9b-c1ff-41c4-9377-b7df1cfe2d22fe04ff9b-c...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>16</th>\n",
|
||||||
|
" <td>aa0efa31-7f46-4511-bf25-1a14f724d43baa0efa31-7...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>17</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>18</th>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>19</th>\n",
|
||||||
|
" <td>5d768096-e9e0-4b2d-bd3b-5e48349fd4ca5d768096-e...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>20</th>\n",
|
||||||
|
" <td>bdd29bf3-c04a-4dd2-85b8-28195d167dffbdd29bf3-c...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>21</th>\n",
|
||||||
|
" <td>67001609-3c2d-4963-9838-b211b0f5010667001609-3...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>22</th>\n",
|
||||||
|
" <td>9747c628-6b97-4b21-a9b2-7a249afedafb9747c628-6...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>23</th>\n",
|
||||||
|
" <td>02cafcb1-98aa-4a7c-bf1e-ddf55a9d5fbe02cafcb1-9...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>24</th>\n",
|
||||||
|
" <td>ebff9dd8-e032-4bed-ab15-67279baf9604ebff9dd8-e...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>25</th>\n",
|
||||||
|
" <td>2aa3427a-5388-4bf7-8076-681f1b8ad5d42aa3427a-5...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>26</th>\n",
|
||||||
|
" <td>246a4471-9477-4adf-9ec5-42ad325733a6246a4471-9...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>27</th>\n",
|
||||||
|
" <td>337f2a2e-2826-4bc8-83c2-8c0459781c1d337f2a2e-2...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>28</th>\n",
|
||||||
|
" <td>7ac6f1a5-9ecb-4ab2-9fc1-7abad928544e7ac6f1a5-9...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>29</th>\n",
|
||||||
|
" <td>a258185a-f60c-452e-82e3-572a544f22cca258185a-f...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>30</th>\n",
|
||||||
|
" <td>e7bde166-4910-407b-9b1d-1943fbcd6da4e7bde166-4...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>31</th>\n",
|
||||||
|
" <td>5660466b-2600-4b9e-bbce-faf192cfc9065660466b-2...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>32</th>\n",
|
||||||
|
" <td>c4cd356d-9d70-4952-8f18-39b3807c81dfc4cd356d-9...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>33</th>\n",
|
||||||
|
" <td>1fc260f6-4713-4b06-a680-38614db8d9561fc260f6-4...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>34</th>\n",
|
||||||
|
" <td>432c91b2-ff25-469f-a623-3f39d5ed67d2432c91b2-f...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>35</th>\n",
|
||||||
|
" <td>7397b503-33ab-47ef-acbe-09edb2176ff37397b503-3...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>36</th>\n",
|
||||||
|
" <td>8269294e-a9b0-483c-8abe-25dbda0656cf8269294e-a...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>37</th>\n",
|
||||||
|
" <td>f2b10aa9-536d-4968-ba7b-eaa98e717f43f2b10aa9-5...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>38</th>\n",
|
||||||
|
" <td>ed5db4a1-5560-4753-9c6b-4e4dc3168ddfed5db4a1-5...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>39</th>\n",
|
||||||
|
" <td>aa252dd8-4d8e-45ec-ab5e-a531085c2459aa252dd8-4...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>40</th>\n",
|
||||||
|
" <td>6aff4eb5-c6f3-4a3a-8052-0e9d2525969d6aff4eb5-c...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>41</th>\n",
|
||||||
|
" <td>e4909d99-cf20-4334-835b-598c66ee2ce1e4909d99-c...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" all_ids\n",
|
||||||
|
"0 97a3e117-a4b9-463f-81ed-8a0fd6826b1297a3e117-a...\n",
|
||||||
|
"1 NaN\n",
|
||||||
|
"2 NaN\n",
|
||||||
|
"3 NaN\n",
|
||||||
|
"4 b27ac00c-0b6f-4909-81db-ef8c136c1c28b27ac00c-0...\n",
|
||||||
|
"5 NaN\n",
|
||||||
|
"6 d48ed765-b997-4998-b48f-d5c6f37764a2d48ed765-b...\n",
|
||||||
|
"7 NaN\n",
|
||||||
|
"8 3fe6a2a0-dcf6-4108-b079-085e7817b9333fe6a2a0-d...\n",
|
||||||
|
"9 4036063b-84bf-4312-883d-306648307e814036063b-8...\n",
|
||||||
|
"10 6e36b3b6-1b35-46a0-89b4-c64b2dcfc4d26e36b3b6-1...\n",
|
||||||
|
"11 NaN\n",
|
||||||
|
"12 6c886d15-2ddf-489e-8440-066ad5c0c0516c886d15-2...\n",
|
||||||
|
"13 6ba02956-f25b-4a68-8c02-0d6308b4460f6ba02956-f...\n",
|
||||||
|
"14 cc1e623d-ba33-4f84-8bfb-7d19d062cc20cc1e623d-b...\n",
|
||||||
|
"15 fe04ff9b-c1ff-41c4-9377-b7df1cfe2d22fe04ff9b-c...\n",
|
||||||
|
"16 aa0efa31-7f46-4511-bf25-1a14f724d43baa0efa31-7...\n",
|
||||||
|
"17 NaN\n",
|
||||||
|
"18 NaN\n",
|
||||||
|
"19 5d768096-e9e0-4b2d-bd3b-5e48349fd4ca5d768096-e...\n",
|
||||||
|
"20 bdd29bf3-c04a-4dd2-85b8-28195d167dffbdd29bf3-c...\n",
|
||||||
|
"21 67001609-3c2d-4963-9838-b211b0f5010667001609-3...\n",
|
||||||
|
"22 9747c628-6b97-4b21-a9b2-7a249afedafb9747c628-6...\n",
|
||||||
|
"23 02cafcb1-98aa-4a7c-bf1e-ddf55a9d5fbe02cafcb1-9...\n",
|
||||||
|
"24 ebff9dd8-e032-4bed-ab15-67279baf9604ebff9dd8-e...\n",
|
||||||
|
"25 2aa3427a-5388-4bf7-8076-681f1b8ad5d42aa3427a-5...\n",
|
||||||
|
"26 246a4471-9477-4adf-9ec5-42ad325733a6246a4471-9...\n",
|
||||||
|
"27 337f2a2e-2826-4bc8-83c2-8c0459781c1d337f2a2e-2...\n",
|
||||||
|
"28 7ac6f1a5-9ecb-4ab2-9fc1-7abad928544e7ac6f1a5-9...\n",
|
||||||
|
"29 a258185a-f60c-452e-82e3-572a544f22cca258185a-f...\n",
|
||||||
|
"30 e7bde166-4910-407b-9b1d-1943fbcd6da4e7bde166-4...\n",
|
||||||
|
"31 5660466b-2600-4b9e-bbce-faf192cfc9065660466b-2...\n",
|
||||||
|
"32 c4cd356d-9d70-4952-8f18-39b3807c81dfc4cd356d-9...\n",
|
||||||
|
"33 1fc260f6-4713-4b06-a680-38614db8d9561fc260f6-4...\n",
|
||||||
|
"34 432c91b2-ff25-469f-a623-3f39d5ed67d2432c91b2-f...\n",
|
||||||
|
"35 7397b503-33ab-47ef-acbe-09edb2176ff37397b503-3...\n",
|
||||||
|
"36 8269294e-a9b0-483c-8abe-25dbda0656cf8269294e-a...\n",
|
||||||
|
"37 f2b10aa9-536d-4968-ba7b-eaa98e717f43f2b10aa9-5...\n",
|
||||||
|
"38 ed5db4a1-5560-4753-9c6b-4e4dc3168ddfed5db4a1-5...\n",
|
||||||
|
"39 aa252dd8-4d8e-45ec-ab5e-a531085c2459aa252dd8-4...\n",
|
||||||
|
"40 6aff4eb5-c6f3-4a3a-8052-0e9d2525969d6aff4eb5-c...\n",
|
||||||
|
"41 e4909d99-cf20-4334-835b-598c66ee2ce1e4909d99-c..."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 25,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"get_diff_for_two_dataframes_by_single_col(ids_from_database, converted_files_df)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "ee2080ff-3a50-4170-894e-c617509ac578",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
" - length of array is the same, so no ifc files from the database are included there\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "0af46bf5-95f2-4fbf-af03-e2ba0f94c1f9",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "47cd4a1f-96b9-4cdd-9d50-9fcb9a1000e2",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user