diff --git a/contrib/segmentation/README.md b/contrib/segmentation/README.md new file mode 100644 index 000000000..f883fa023 --- /dev/null +++ b/contrib/segmentation/README.md @@ -0,0 +1,15 @@ +# Semantic Segmentation using PyTorch and Azure Machine Learning + +This subproject contains a production ready training pipeline for a semantic segmentation model using PyTorch and Azure Machine Learning. + +## Installation + +To install the Azure ML CLI v2, [follow these instructions](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-configure-cli) + +To install the last set of known working python dependencies run + +```bash +pip install requirements.txt +``` + +Note that this project utilizes [pip-tools](https://github.com/jazzband/pip-tools) to manage its dependencies. Direct dependencies that the project requires are specified in `requirements.in` and may be upgraded to greater versions than that of `requirements.txt` at your own risk. diff --git a/contrib/segmentation/config/__init__.py b/contrib/segmentation/config/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/config/augmentation.py b/contrib/segmentation/config/augmentation.py new file mode 100644 index 000000000..d349e3e87 --- /dev/null +++ b/contrib/segmentation/config/augmentation.py @@ -0,0 +1,42 @@ +from typing import Tuple +import albumentations as A + + +def _preprocessing(patch_dim: Tuple[int, int] = (512, 512)): + transform = A.Compose( + [ + # This allows meaningful yet stochastic cropped views + A.CropNonEmptyMaskIfExists(patch_dim[0], patch_dim[1], p=1), + A.RandomRotate90(p=0.5), + A.HorizontalFlip(p=0.5), + A.VerticalFlip(p=0.5), + A.Blur(p=0.25), + A.ColorJitter(p=0.25), + A.GaussNoise(p=0.25), + A.CoarseDropout(p=0.5, max_holes=64, max_height=8, max_width=8), + A.RandomBrightnessContrast(p=0.25), + ], + ) + return transform + + +def _augmentation(patch_dim: Tuple[int, int] = (512, 512)): + transform = A.Compose( + [ + # This allows meaningful yet stochastic cropped views + A.CropNonEmptyMaskIfExists(patch_dim[0], patch_dim[1], p=1), + A.RandomRotate90(p=0.5), + A.HorizontalFlip(p=0.5), + A.VerticalFlip(p=0.5), + A.Blur(p=0.25), + A.ColorJitter(p=0.25), + A.GaussNoise(p=0.25), + A.CoarseDropout(p=0.5, max_holes=64, max_height=8, max_width=8), + A.RandomBrightnessContrast(p=0.25), + ], + ) + return transform + + +preprocessing = _preprocessing() +augmentation = _augmentation() diff --git a/contrib/segmentation/data/test_data/labels/aml_coco_labels.json b/contrib/segmentation/data/test_data/labels/aml_coco_labels.json new file mode 100644 index 000000000..bc4d97bff --- /dev/null +++ b/contrib/segmentation/data/test_data/labels/aml_coco_labels.json @@ -0,0 +1,1418 @@ +{ + "images": [ + { + "id": 1, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 2, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 3, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 4, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 5, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 6, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 7, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 8, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 9, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 10, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + } + ], + "annotations": [ + { + "segmentation": [ + [ + 0.6568766457362771, + 0.4782633500684619, + 0.6578894065221794, + 0.4727863989046098, + 0.6619404496657889, + 0.4764376996805112 + ] + ], + "id": 1, + "category_id": 1, + "image_id": 1, + "area": 0.0, + "bbox": [ + 0.6568766457362771, + 0.4727863989046098, + 0.005063803929511779, + 0.005476951163852106 + ] + }, + { + "segmentation": [ + [ + 0.6022352510076951, + 0.46838012661712086, + 0.5850128252106999, + 0.48351913019543075, + 0.5883107365335287, + 0.48874896779521054, + 0.5958226456577501, + 0.48654693091109275, + 0.5985709050934408, + 0.48104183870079825, + 0.6035177720776841, + 0.4807665840902835, + 0.605533162330524, + 0.47828929259565095, + 0.6099303774276291, + 0.47471098265895956, + 0.6095639428362037, + 0.47058216350123866, + 0.6048002931476731, + 0.469205890448665 + ] + ], + "id": 2, + "category_id": 1, + "image_id": 2, + "area": 0.0, + "bbox": [ + 0.5850128252106999, + 0.46838012661712086, + 0.024917552216929262, + 0.02036884117808968 + ] + }, + { + "segmentation": [ + [ + 0.7492063492063492, + 0.4451820940819423, + 0.7230880230880231, + 0.4657760676349447, + 0.7252525252525253, + 0.47097875569044007, + 0.7500721500721501, + 0.45537069152395404, + 0.7556998556998556, + 0.4493008887925428 + ] + ], + "id": 3, + "category_id": 3, + "image_id": 2, + "area": 0.0, + "bbox": [ + 0.7230880230880231, + 0.4451820940819423, + 0.032611832611832536, + 0.02579666160849775 + ] + }, + { + "segmentation": [ + [ + 0.7158730158730159, + 0.36063841318014306, + 0.7103896103896103, + 0.3649739865597225, + 0.7115440115440116, + 0.3697431172772599, + 0.7173160173160174, + 0.3651907652287015 + ] + ], + "id": 4, + "category_id": 3, + "image_id": 2, + "area": 0.0, + "bbox": [ + 0.7103896103896103, + 0.36063841318014306, + 0.006926406926407003, + 0.009104704097116834 + ] + }, + { + "segmentation": [ + [ + 0.6385281385281385, + 0.1533980056362454, + 0.6313131313131313, + 0.15751680034684587, + 0.6375180375180375, + 0.15838391502276175, + 0.6415584415584416, + 0.15599934966399306 + ] + ], + "id": 5, + "category_id": 1, + "image_id": 2, + "area": 0.0, + "bbox": [ + 0.6313131313131313, + 0.1533980056362454, + 0.01024531024531028, + 0.004985909386516357 + ] + }, + { + "segmentation": [ + [ + 0.7059362403810919, + 0.541597853014038, + 0.7019054598754122, + 0.5314134324249932, + 0.7042872847196775, + 0.5300371593724195, + 0.7083180652253572, + 0.5308629232039637 + ] + ], + "id": 6, + "category_id": 1, + "image_id": 3, + "area": 0.0, + "bbox": [ + 0.7019054598754122, + 0.5300371593724195, + 0.006412605349945033, + 0.011560693641618491 + ] + }, + { + "segmentation": [ + [ + 0.7128984976181751, + 0.546552436003303, + 0.7139978013924514, + 0.553709055876686, + 0.7169292781238549, + 0.5553605835397742, + 0.7182117991938439, + 0.5504060005505093 + ] + ], + "id": 7, + "category_id": 1, + "image_id": 3, + "area": 0.0, + "bbox": [ + 0.7128984976181751, + 0.546552436003303, + 0.005313301575668783, + 0.00880814753647119 + ] + }, + { + "segmentation": [ + [ + 0.8145840967387321, + 0.13779933938893477, + 0.8169659215829974, + 0.14495595926231764, + 0.8198973983144009, + 0.13862510322047894 + ] + ], + "id": 8, + "category_id": 1, + "image_id": 3, + "area": 0.0, + "bbox": [ + 0.8145840967387321, + 0.13779933938893477, + 0.005313301575668783, + 0.007156619873382869 + ] + }, + { + "segmentation": [ + [ + 0.8145743145743146, + 0.6888413180143074, + 0.810966810966811, + 0.6981628007804032, + 0.8092352092352092, + 0.7050997181877303, + 0.8131313131313131, + 0.7040158248428354, + 0.8181818181818182, + 0.6955614567526556, + 0.8184704184704185, + 0.6905755473661391 + ] + ], + "id": 9, + "category_id": 1, + "image_id": 4, + "area": 0.0, + "bbox": [ + 0.8092352092352092, + 0.6888413180143074, + 0.009235209235209263, + 0.01625840017342295 + ] + }, + { + "segmentation": [ + [ + 0.8062049062049063, + 0.6832050726208541, + 0.7968253968253968, + 0.6927433340559289, + 0.7937950937950938, + 0.6964285714285714, + 0.7935064935064935, + 0.7027151528289616, + 0.796969696969697, + 0.7027151528289616, + 0.8037518037518038, + 0.6949111207457186, + 0.8075036075036075, + 0.6901419900281812, + 0.8111111111111111, + 0.6834218512898331 + ] + ], + "id": 10, + "category_id": 1, + "image_id": 4, + "area": 0.0, + "bbox": [ + 0.7935064935064935, + 0.6832050726208541, + 0.017604617604617623, + 0.019510080208107516 + ] + }, + { + "segmentation": [ + [ + 0.47051442910915936, + 0.524170750094233, + 0.46925972396486826, + 0.54188654353562, + 0.48030112923462986, + 0.5335940444779494 + ] + ], + "id": 11, + "category_id": 2, + "image_id": 5, + "area": 0.0, + "bbox": [ + 0.46925972396486826, + 0.524170750094233, + 0.011041405269761606, + 0.017715793441387073 + ] + }, + { + "segmentation": [ + [ + 0.47051442910915936, + 0.5136166603844704, + 0.47076537013801756, + 0.520778364116095, + 0.4780426599749059, + 0.5222860912174896 + ] + ], + "id": 12, + "category_id": 1, + "image_id": 5, + "area": 0.0, + "bbox": [ + 0.47051442910915936, + 0.5136166603844704, + 0.007528230865746555, + 0.008669430833019187 + ] + }, + { + "segmentation": [ + [ + 0.2895739109735777, + 0.5819104237439656, + 0.28909783384908355, + 0.588168246021813, + 0.2912401809093073, + 0.5897774003218309, + 0.29254939300166627, + 0.5831619881995351 + ] + ], + "id": 13, + "category_id": 1, + "image_id": 6, + "area": 0.0, + "bbox": [ + 0.28909783384908355, + 0.5819104237439656, + 0.003451559152582717, + 0.007866976577865281 + ] + }, + { + "segmentation": [ + [ + 0.287669602475601, + 0.5978231718219202, + 0.2877886217567246, + 0.6051537636331128, + 0.28933587241133063, + 0.6064053280886823, + 0.290288026660319, + 0.6026506347219739, + 0.2904070459414425, + 0.5981807616663687 + ] + ], + "id": 14, + "category_id": 1, + "image_id": 6, + "area": 0.0, + "bbox": [ + 0.287669602475601, + 0.5978231718219202, + 0.0027374434658414826, + 0.008582156266762064 + ] + }, + { + "segmentation": [ + [ + 0.263237139272271, + 0.7736995853750471, + 0.26348808030112925, + 0.7823690162080663, + 0.2685069008782936, + 0.7767150395778364 + ] + ], + "id": 15, + "category_id": 2, + "image_id": 7, + "area": 0.0, + "bbox": [ + 0.263237139272271, + 0.7736995853750471, + 0.005269761606022605, + 0.008669430833019187 + ] + }, + { + "segmentation": [ + [ + 0.2767879548306148, + 0.7194214097248398, + 0.27603513174404015, + 0.725829249905767, + 0.2810539523212045, + 0.7216830003769318 + ] + ], + "id": 16, + "category_id": 2, + "image_id": 7, + "area": 0.0, + "bbox": [ + 0.27603513174404015, + 0.7194214097248398, + 0.005018820577164351, + 0.006407840180927216 + ] + }, + { + "segmentation": [ + [ + 0.3541590326126786, + 0.0037503440682631435, + 0.3594723341883474, + 0.018063583815028903, + 0.36386954928545256, + 0.04779108175061932, + 0.37303041407108833, + 0.07504128819157721, + 0.3799926713081715, + 0.10559454995871181, + 0.38567240747526566, + 0.13201899256812552, + 0.39171857823378525, + 0.1592691990090834, + 0.39538292414803955, + 0.17550922102945224, + 0.395749358739465, + 0.18184007707129093, + 0.40344448515939907, + 0.20716350123864574, + 0.4080249175522169, + 0.22643132397467658, + 0.41260534994503484, + 0.24707541976328104, + 0.4171857823378527, + 0.2668937517203413, + 0.42176621473067055, + 0.2679947701624002, + 0.4146207401978747, + 0.23441370767960362, + 0.3972150971051667, + 0.1633980181668043, + 0.3836570172224258, + 0.10256674924304982, + 0.3788933675338952, + 0.08467519955959263 + ] + ], + "id": 17, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.3541590326126786, + 0.0037503440682631435, + 0.06760718211799194, + 0.26424442609413706 + ] + }, + { + "segmentation": [ + [ + 0.4173689996335654, + 0.2696462978254886, + 0.42396482227922316, + 0.2957954858243876, + 0.42891168926346646, + 0.3257982383704927, + 0.4351410773176988, + 0.3530484448114506, + 0.44082081348479296, + 0.3734172859895403, + 0.4430194210333456, + 0.38360170657858517, + 0.4424697691462074, + 0.39598816405174786, + 0.4483327226090143, + 0.4174580236718965, + 0.4518138512275559, + 0.4301197357555739, + 0.4525467204104067, + 0.4317712634186623, + 0.4567607182117992, + 0.4287434627030003, + 0.4424697691462074, + 0.3632328654004955, + 0.42634664712348846, + 0.28698733828791634, + 0.42176621473067055, + 0.26827002477291495, + 0.41993404177354343, + 0.26909578860445915 + ] + ], + "id": 18, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.4173689996335654, + 0.26827002477291495, + 0.039391718578233814, + 0.16350123864574734 + ] + }, + { + "segmentation": [ + [ + 0.48149505313301577, + 0.002374071015689513, + 0.49835104433858557, + 0.06155381227635563, + 0.5043972150971051, + 0.07834434351775392, + 0.5093440820813485, + 0.07366501513900357, + 0.5042139978013924, + 0.058250756950178915, + 0.49230487358006597, + 0.016412056151940545, + 0.48717478930010993, + 0.0015483071841453345 + ] + ], + "id": 19, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.48149505313301577, + 0.0015483071841453345, + 0.027849028948332744, + 0.07679603633360858 + ] + }, + { + "segmentation": [ + [ + 0.5144741663613045, + 0.07228874208642995, + 0.5093440820813485, + 0.06926094137076796, + 0.5054965188713815, + 0.031551059730250484, + 0.5020153902528399, + 0.009530690889072392, + 0.5014657383657017, + 0.0026493256262042388, + 0.506412605349945, + 0.0026493256262042388, + 0.5111762550384756, + 0.040359207266721715 + ] + ], + "id": 20, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.5014657383657017, + 0.0026493256262042388, + 0.013008427995602756, + 0.06963941646022571 + ] + }, + { + "segmentation": [ + [ + 0.5054965188713815, + 0.08082163501238646, + 0.5192378160498351, + 0.12678915496834572, + 0.5300476364968854, + 0.1592691990090834, + 0.5381091975082448, + 0.18266584090283514, + 0.5434224990839135, + 0.17853702174511424, + 0.5272993770611946, + 0.1276149187998899, + 0.5146573836570172, + 0.08715249105422516, + 0.5135580798827409, + 0.07146297825488578 + ] + ], + "id": 21, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.5054965188713815, + 0.07146297825488578, + 0.03792598021253202, + 0.11120286264794936 + ] + }, + { + "segmentation": [ + [ + 0.5386588493953829, + 0.18541838700798238, + 0.5505679736167094, + 0.23578998073217727, + 0.5588127519237817, + 0.2801059730250482, + 0.567607182117992, + 0.32221992843380126, + 0.5718211799193844, + 0.35167217175887694, + 0.5873946500549652, + 0.35249793559042114, + 0.567607182117992, + 0.27047206165703275, + 0.5505679736167094, + 0.20193366363886595, + 0.5439721509710517, + 0.18128956785026148, + 0.5401245877610846, + 0.17963804018717314 + ] + ], + "id": 22, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.5386588493953829, + 0.17963804018717314, + 0.048735800659582273, + 0.172859895403248 + ] + }, + { + "segmentation": [ + [ + 0.5716379626236717, + 0.3541494632535095, + 0.5786002198607548, + 0.3910335810624828, + 0.5807988274093074, + 0.4182837875034407, + 0.5798827409307439, + 0.44195568400770713, + 0.5853792598021254, + 0.4513143407652078, + 0.5861121289849762, + 0.46782961739609136, + 0.5866617808721143, + 0.48874896779521054, + 0.5903261267863686, + 0.5038879713735205, + 0.5943569072920484, + 0.5193022295623452, + 0.6251374129717845, + 0.5204032480044041, + 0.6020520337119825, + 0.4122281860721167, + 0.5864785635764016, + 0.3522226809799064 + ] + ], + "id": 23, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.5716379626236717, + 0.3522226809799064, + 0.053499450348112765, + 0.16818056702449768 + ] + }, + { + "segmentation": [ + [ + 0.5945401245877611, + 0.5212290118359483, + 0.6046170758519605, + 0.5572873658133773, + 0.611945767680469, + 0.5776562069914671, + 0.6134115060461708, + 0.5963735205064685, + 0.6174422865518505, + 0.6203206716212497, + 0.6218395016489556, + 0.6285783099366914, + 0.629351410773177, + 0.6343586567575007, + 0.6366801026016856, + 0.663260390861547, + 0.634481495053133, + 0.6150908340214699, + 0.6291681934774642, + 0.560590421139554, + 0.6245877610846464, + 0.521504266446463 + ] + ], + "id": 24, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.5945401245877611, + 0.5212290118359483, + 0.04213997801392455, + 0.1420313790255987 + ] + }, + { + "segmentation": [ + [ + 0.6366801026016856, + 0.6673892100192679, + 0.6449248809087578, + 0.708402146985962, + 0.6528032246244045, + 0.7430842279108175, + 0.6621473067057531, + 0.7631778144783925, + 0.6764382557713449, + 0.81602669969722, + 0.6806522535727373, + 0.8050165152766309, + 0.6610480029314767, + 0.7334503165428021, + 0.6386954928545254, + 0.6695912469033857 + ] + ], + "id": 25, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.6366801026016856, + 0.6673892100192679, + 0.04397215097105167, + 0.1486374896779521 + ] + }, + { + "segmentation": [ + [ + 0.6766214730670576, + 0.8185039911918525, + 0.6881641626969586, + 0.8663982934214148, + 0.6971418101868816, + 0.8994288466831819, + 0.6949432026383291, + 0.9184214148086981, + 0.6954928545254672, + 0.9352119460500964, + 0.6993404177354342, + 0.9577828241123039, + 0.704653719311103, + 0.9875103220478944, + 0.7081348479296445, + 0.9990710156895128, + 0.7282887504580432, + 0.9974194880264244, + 0.7216929278123855, + 0.9679672447013488, + 0.704653719311103, + 0.9049339388934765, + 0.6839501648955661, + 0.8286884117808974, + 0.6804690362770246, + 0.8193297550233967 + ] + ], + "id": 26, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.6766214730670576, + 0.8185039911918525, + 0.051667277390985644, + 0.18056702449766038 + ] + }, + { + "segmentation": [ + [ + 0.6828508611212899, + 0.7849229287090559, + 0.6866984243312568, + 0.8185039911918525, + 0.6945767680469036, + 0.8185039911918525, + 0.6855991205569806, + 0.7846476740985412, + 0.684683034078417, + 0.7851981833195706 + ] + ], + "id": 27, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.6828508611212899, + 0.7846476740985412, + 0.011725906925613705, + 0.03385631709331127 + ] + }, + { + "segmentation": [ + [ + 0.6462074019787468, + 0.536643270024773, + 0.6440087944301942, + 0.498382879163226, + 0.6381458409673874, + 0.45709468758601707, + 0.6352143642359839, + 0.4328722818607212, + 0.6262367167460609, + 0.3794728874208643, + 0.6179919384389886, + 0.3073561794660061, + 0.6062660315133749, + 0.2498279658684283, + 0.5996702088677172, + 0.2107418111753372, + 0.5932576035177721, + 0.21376961189099916 + ] + ], + "id": 28, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.5932576035177721, + 0.2107418111753372, + 0.052949798460974695, + 0.3259014588494358 + ] + }, + { + "segmentation": [ + [ + 0.596555514840601, + 0.20716350123864574, + 0.5784170025650421, + 0.08054638040187173, + 0.5754855258336387, + 0.038432424993118636, + 0.5742030047636497, + 0.028523259014588496, + 0.572737266397948, + 0.005952380952380952, + 0.5775009160864786, + 0.002374071015689513, + 0.5795163063393184, + 0.03182631434076521, + 0.58299743495786, + 0.07091246903385631, + 0.585562477097838, + 0.0959606385906964, + 0.5894100403078051, + 0.13532204789430222, + 0.5950897764748992, + 0.1790875309661437 + ] + ], + "id": 29, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.572737266397948, + 0.002374071015689513, + 0.02381824844265301, + 0.20478943022295623 + ] + }, + { + "segmentation": [ + [ + 0.7957127152803225, + 0.02301816680429397, + 0.7854525467204104, + 0.03320258739333884, + 0.7751923781604984, + 0.051919900908340214, + 0.7618175155734701, + 0.07476603358106249, + 0.7539391718578233, + 0.10256674924304982, + 0.7467936973250274, + 0.11990778970547757, + 0.7422132649322096, + 0.13339526562069914, + 0.7334188347379993, + 0.15569088907239195, + 0.7266397947966288, + 0.1782617671345995, + 0.7292048369366069, + 0.18541838700798238, + 0.7552216929278124, + 0.11935728048444812, + 0.7803224624404543, + 0.054672447013487474 + ] + ], + "id": 30, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.7266397947966288, + 0.02301816680429397, + 0.06907292048369373, + 0.1624002202036884 + ] + }, + { + "segmentation": [ + [ + 0.6780872114327593, + 0.10586980456922654, + 0.6696592158299743, + 0.15266308835672998, + 0.6665445218028582, + 0.17991329479768786, + 0.6656284353242946, + 0.2044109551334985, + 0.6707585196042506, + 0.2107418111753372, + 0.6733235617442287, + 0.16257225433526012, + 0.6786368633198974, + 0.11550371593724194 + ] + ], + "id": 31, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.6656284353242946, + 0.10586980456922654, + 0.013008427995602756, + 0.10487200660611065 + ] + }, + { + "segmentation": [ + [ + 0.6612312202271895, + 0.13449628406275804, + 0.6562843532429461, + 0.19477704376548308, + 0.658299743495786, + 0.20936553812276357, + 0.6623305240014657, + 0.20798926507018992, + 0.6641626969585929, + 0.14000137627305256, + 0.6652620007328692, + 0.12293559042113955 + ] + ], + "id": 32, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.6562843532429461, + 0.12293559042113955, + 0.008977647489923135, + 0.08642994770162402 + ] + }, + { + "segmentation": [ + [ + 0.657017222425797, + 0.215971648775117, + 0.6604983510443386, + 0.34203826039086155, + 0.6630633931843166, + 0.33488164051747865, + 0.6619640894100403, + 0.21019130195430774 + ] + ], + "id": 33, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.657017222425797, + 0.21019130195430774, + 0.006046170758519653, + 0.1318469584365538 + ] + }, + { + "segmentation": [ + [ + 0.664712348845731, + 0.22285301403798513, + 0.6630633931843166, + 0.3123107624552711, + 0.6691095639428362, + 0.3511216625378475, + 0.6643459142543056, + 0.33378062207541975, + 0.6687431293514108, + 0.3313033305807872, + 0.6687431293514108, + 0.25037847508945776, + 0.6687431293514108, + 0.2107418111753372 + ] + ], + "id": 34, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.6630633931843166, + 0.2107418111753372, + 0.006046170758519542, + 0.1403798513625103 + ] + }, + { + "segmentation": [ + [ + 0.6604983510443386, + 0.35167217175887694, + 0.6542689629901063, + 0.3725915221579961, + 0.6568340051300843, + 0.3921345995045417, + 0.6641626969585929, + 0.4482865400495458, + 0.6725906925613778, + 0.4986581337737407, + 0.6764382557713449, + 0.49177676851087254, + 0.6617808721143276, + 0.36626066611615743 + ] + ], + "id": 35, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.6542689629901063, + 0.35167217175887694, + 0.02216929278123858, + 0.14698596201486375 + ] + }, + { + "segmentation": [ + [ + 0.6797361670941737, + 0.48654693091109275, + 0.6826676438255771, + 0.35662675474814204, + 0.682117991938439, + 0.33763418662262595, + 0.6771711249541956, + 0.336533168180567, + 0.6740564309270796, + 0.34121249655931735, + 0.6779039941370465, + 0.45186484998623727, + 0.6806522535727373, + 0.48764794935315164 + ] + ], + "id": 36, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.6740564309270796, + 0.336533168180567, + 0.008611212898497533, + 0.15111478117258464 + ] + }, + { + "segmentation": [ + [ + 0.6614144375229022, + 0.34809386182218555, + 0.6648955661414437, + 0.3684627030002753, + 0.6700256504213998, + 0.4006674924304982, + 0.669842433125687, + 0.4160817506193229, + 0.6722242579699523, + 0.443056702449766, + 0.6749725174056431, + 0.40837462152491055, + 0.6678270428728472, + 0.35580099091659784, + 0.6670941736899964, + 0.34947013487475914 + ] + ], + "id": 37, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.6614144375229022, + 0.34809386182218555, + 0.013558079882740937, + 0.09496284062758048 + ] + }, + { + "segmentation": [ + [ + 0.7685965555148406, + 0.8174029727497936, + 0.7847196775375596, + 0.9302573630608313, + 0.7929644558446317, + 0.9927401596476741, + 0.7990106266031514, + 0.9946669419212771, + 0.787651154268963, + 0.916494632535095, + 0.7722609014290949, + 0.8168524635287641 + ] + ], + "id": 38, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.7685965555148406, + 0.8168524635287641, + 0.030414071088310735, + 0.17781447839251308 + ] + }, + { + "segmentation": [ + [ + 0.8147673140344448, + 0.9963184695843655, + 0.8056064492488091, + 0.9971442334159097, + 0.7924148039574936, + 0.9189719240297275, + 0.7830707218761451, + 0.8201555188549409, + 0.7949798460974716, + 0.819054500412882, + 0.8010260168559912, + 0.9052091935039912, + 0.8103700989373397, + 0.9693435177539224 + ] + ], + "id": 39, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.7830707218761451, + 0.819054500412882, + 0.031696592158299675, + 0.17808973300302777 + ] + }, + { + "segmentation": [ + [ + 0.8246610480029315, + 0.8837393338838425, + 0.8250274825943569, + 0.9952174511423066, + 0.8329058263100036, + 0.9963184695843655, + 0.8292414803957494, + 0.8834640792733278 + ] + ], + "id": 40, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.8246610480029315, + 0.8834640792733278, + 0.008244778307072154, + 0.11285439031103772 + ] + }, + { + "segmentation": [ + [ + 0.8733968486625138, + 0.817953481970823, + 0.8558079882740931, + 0.887317643820534, + 0.8473799926713081, + 0.919522433250757, + 0.8470135580798828, + 0.9470478943022296, + 0.8433492121656284, + 0.9806289567850262, + 0.8446317332356175, + 0.9935659234792182, + 0.8592891168926347, + 0.913466831819433, + 0.8741297178453646, + 0.820981282686485 + ] + ], + "id": 41, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 0.8433492121656284, + 0.817953481970823, + 0.030780505679736225, + 0.17561244150839528 + ] + }, + { + "segmentation": [ + [ + 0.6476787954830615, + 0.9625424048247267, + 0.6486825595984943, + 0.9742272898605352, + 0.6521957340025094, + 0.9648039954768187 + ] + ], + "id": 42, + "category_id": 2, + "image_id": 9, + "area": 0.0, + "bbox": [ + 0.6476787954830615, + 0.9625424048247267, + 0.0045169385194478995, + 0.011684885035808446 + ] + }, + { + "segmentation": [ + [ + 0.7708908406524467, + 0.8539860535243121, + 0.7721455457967378, + 0.8634093479080287, + 0.7761606022584693, + 0.8554937806257068 + ] + ], + "id": 43, + "category_id": 2, + "image_id": 9, + "area": 0.0, + "bbox": [ + 0.7708908406524467, + 0.8539860535243121, + 0.005269761606022549, + 0.009423294383716585 + ] + }, + { + "segmentation": [ + [ + 0.5279799247176914, + 0.5053241613267998, + 0.5299874529485571, + 0.519647568790049, + 0.5385194479297365, + 0.5226630229928383, + 0.5392722710163111, + 0.5139935921598191, + 0.5365119196988708, + 0.5026856388993592 + ] + ], + "id": 44, + "category_id": 1, + "image_id": 10, + "area": 0.0, + "bbox": [ + 0.5279799247176914, + 0.5026856388993592, + 0.011292346298619749, + 0.019977384093479045 + ] + } + ], + "categories": [ + { + "id": 1, + "name": "Class 1" + }, + { + "id": 2, + "name": "Class 2" + }, + { + "id": 3, + "name": "Class 3" + }, + { + "id": 4, + "name": "Class 4" + } + ] +} \ No newline at end of file diff --git a/contrib/segmentation/data/test_data/labels/standard_coco_labels.json b/contrib/segmentation/data/test_data/labels/standard_coco_labels.json new file mode 100644 index 000000000..8237dc4ab --- /dev/null +++ b/contrib/segmentation/data/test_data/labels/standard_coco_labels.json @@ -0,0 +1,1418 @@ +{ + "images": [ + { + "id": 1, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 2, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 3, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 4, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 5, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 6, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 7, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 8, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 9, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + }, + { + "id": 10, + "width": 5456, + "height": 3632, + "file_name": "test.jpg", + "coco_url": "test.jpg", + "absolute_url": "some_blob_path/test.jpg", + "date_captured": "2021-08-T12:04:52.3924957Z" + } + ], + "annotations": [ + { + "segmentation": [ + [ + 3584.0, + 1737.0, + 3589.0, + 1717.0, + 3612.0, + 1730.0 + ] + ], + "id": 1, + "category_id": 1, + "image_id": 1, + "area": 0.0, + "bbox": [ + 3583.918979137128, + 1717.1602008215427, + 3611.547093376544, + 1737.0524874486534 + ] + }, + { + "segmentation": [ + [ + 3286.0, + 1701.0, + 3192.0, + 1756.0, + 3210.0, + 1775.0, + 3251.0, + 1767.0, + 3266.0, + 1747.0, + 3293.0, + 1746.0, + 3304.0, + 1737.0, + 3328.0, + 1724.0, + 3326.0, + 1709.0, + 3300.0, + 1704.0 + ] + ], + "id": 2, + "category_id": 1, + "image_id": 2, + "area": 0.0, + "bbox": [ + 3191.8299743495786, + 1701.156619873383, + 3327.7801392451447, + 1775.1362510322047 + ] + }, + { + "segmentation": [ + [ + 4088.0, + 1617.0, + 3945.0, + 1692.0, + 3957.0, + 1711.0, + 4092.0, + 1654.0, + 4123.0, + 1632.0 + ] + ], + "id": 3, + "category_id": 3, + "image_id": 2, + "area": 0.0, + "bbox": [ + 3945.1682539682542, + 1616.9013657056146, + 4123.098412698412, + 1710.5948406676782 + ] + }, + { + "segmentation": [ + [ + 3906.0, + 1310.0, + 3876.0, + 1326.0, + 3882.0, + 1343.0, + 3914.0, + 1326.0 + ] + ], + "id": 4, + "category_id": 3, + "image_id": 2, + "area": 0.0, + "bbox": [ + 3875.885714285714, + 1309.8387166702796, + 3913.6761904761906, + 1342.907001951008 + ] + }, + { + "segmentation": [ + [ + 3484.0, + 557.0, + 3444.0, + 572.0, + 3478.0, + 575.0, + 3500.0, + 567.0 + ] + ], + "id": 5, + "category_id": 1, + "image_id": 2, + "area": 0.0, + "bbox": [ + 3444.4444444444443, + 557.1415564708433, + 3500.342857142857, + 575.2503793626706 + ] + }, + { + "segmentation": [ + [ + 3852.0, + 1967.0, + 3830.0, + 1930.0, + 3843.0, + 1925.0, + 3865.0, + 1928.0 + ] + ], + "id": 6, + "category_id": 1, + "image_id": 3, + "area": 0.0, + "bbox": [ + 3829.596189080249, + 1925.0949628406274, + 3864.583363869549, + 1967.083402146986 + ] + }, + { + "segmentation": [ + [ + 3890.0, + 1985.0, + 3896.0, + 2011.0, + 3912.0, + 2017.0, + 3919.0, + 1999.0 + ] + ], + "id": 7, + "category_id": 1, + "image_id": 3, + "area": 0.0, + "bbox": [ + 3889.5742030047636, + 1985.0784475639966, + 3918.5635764016124, + 2017.06963941646 + ] + }, + { + "segmentation": [ + [ + 4444.0, + 500.0, + 4457.0, + 526.0, + 4473.0, + 503.0 + ] + ], + "id": 8, + "category_id": 1, + "image_id": 3, + "area": 0.0, + "bbox": [ + 4444.370831806523, + 500.48720066061105, + 4473.360205203371, + 526.4800440407377 + ] + }, + { + "segmentation": [ + [ + 4444.0, + 2502.0, + 4425.0, + 2536.0, + 4415.0, + 2561.0, + 4436.0, + 2557.0, + 4464.0, + 2526.0, + 4466.0, + 2508.0 + ] + ], + "id": 9, + "category_id": 1, + "image_id": 4, + "area": 0.0, + "bbox": [ + 4415.187301587302, + 2501.8716670279646, + 4465.574603174603, + 2560.9221764578365 + ] + }, + { + "segmentation": [ + [ + 4399.0, + 2481.0, + 4347.0, + 2516.0, + 4331.0, + 2529.0, + 4329.0, + 2552.0, + 4348.0, + 2552.0, + 4385.0, + 2524.0, + 4406.0, + 2507.0, + 4425.0, + 2482.0 + ] + ], + "id": 10, + "category_id": 1, + "image_id": 4, + "area": 0.0, + "bbox": [ + 4329.371428571429, + 2481.400823758942, + 4425.422222222222, + 2552.2614350747886 + ] + }, + { + "segmentation": [ + [ + 2567.0, + 1904.0, + 2560.0, + 1968.0, + 2621.0, + 1938.0 + ] + ], + "id": 11, + "category_id": 2, + "image_id": 5, + "area": 0.0, + "bbox": [ + 2560.281053952321, + 1903.788164342254, + 2620.5229611041404, + 1968.131926121372 + ] + }, + { + "segmentation": [ + [ + 2567.0, + 1865.0, + 2568.0, + 1891.0, + 2608.0, + 1897.0 + ] + ], + "id": 12, + "category_id": 1, + "image_id": 5, + "area": 0.0, + "bbox": [ + 2567.1267252195735, + 1865.4557105163965, + 2608.2007528230865, + 1896.9430833019221 + ] + }, + { + "segmentation": [ + [ + 1580.0, + 2113.0, + 1577.0, + 2136.0, + 1589.0, + 2142.0, + 1596.0, + 2118.0 + ] + ], + "id": 13, + "category_id": 1, + "image_id": 6, + "area": 0.0, + "bbox": [ + 1577.3177814806, + 2113.498659038083, + 1596.1494882170912, + 2142.07151796889 + ] + }, + { + "segmentation": [ + [ + 1570.0, + 2171.0, + 1570.0, + 2198.0, + 1579.0, + 2202.0, + 1584.0, + 2189.0, + 1584.0, + 2173.0 + ] + ], + "id": 14, + "category_id": 1, + "image_id": 6, + "area": 0.0, + "bbox": [ + 1569.5253511068793, + 2171.2937600572145, + 1584.4608426565103, + 2202.464151618094 + ] + }, + { + "segmentation": [ + [ + 1436.0, + 2810.0, + 1438.0, + 2842.0, + 1465.0, + 2821.0 + ] + ], + "id": 15, + "category_id": 2, + "image_id": 7, + "area": 0.0, + "bbox": [ + 1436.2218318695104, + 2810.0768940821713, + 1464.9736511919698, + 2841.564266867697 + ] + }, + { + "segmentation": [ + [ + 1510.0, + 2613.0, + 1506.0, + 2636.0, + 1533.0, + 2621.0 + ] + ], + "id": 16, + "category_id": 2, + "image_id": 7, + "area": 0.0, + "bbox": [ + 1506.047678795483, + 2612.938560120618, + 1533.4303638644917, + 2636.2118356577457 + ] + }, + { + "segmentation": [ + [ + 1932.0, + 14.0, + 1961.0, + 66.0, + 1985.0, + 174.0, + 2035.0, + 273.0, + 2073.0, + 384.0, + 2104.0, + 479.0, + 2137.0, + 578.0, + 2157.0, + 637.0, + 2159.0, + 660.0, + 2201.0, + 752.0, + 2226.0, + 822.0, + 2251.0, + 897.0, + 2276.0, + 969.0, + 2301.0, + 973.0, + 2262.0, + 851.0, + 2167.0, + 593.0, + 2093.0, + 373.0, + 2067.0, + 308.0 + ] + ], + "id": 17, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 1932.2916819347745, + 13.621249655931738, + 2301.1564675705386, + 973.3570052298376 + ] + }, + { + "segmentation": [ + [ + 2277.0, + 979.0, + 2313.0, + 1074.0, + 2340.0, + 1183.0, + 2374.0, + 1282.0, + 2405.0, + 1356.0, + 2417.0, + 1393.0, + 2414.0, + 1438.0, + 2446.0, + 1516.0, + 2465.0, + 1562.0, + 2469.0, + 1568.0, + 2492.0, + 1557.0, + 2414.0, + 1319.0, + 2326.0, + 1042.0, + 2301.0, + 974.0, + 2291.0, + 977.0 + ] + ], + "id": 18, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 2277.1652620007326, + 974.3567299752272, + 2492.0864785635763, + 1568.1932287365814 + ] + }, + { + "segmentation": [ + [ + 2627.0, + 9.0, + 2719.0, + 224.0, + 2752.0, + 285.0, + 2779.0, + 268.0, + 2751.0, + 212.0, + 2686.0, + 60.0, + 2658.0, + 6.0 + ] + ], + "id": 19, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 2627.037009893734, + 5.623451692815855, + 2778.9813118358375, + 284.54665565648224 + ] + }, + { + "segmentation": [ + [ + 2807.0, + 263.0, + 2779.0, + 252.0, + 2758.0, + 115.0, + 2739.0, + 35.0, + 2736.0, + 10.0, + 2763.0, + 10.0, + 2789.0, + 147.0 + ] + ], + "id": 20, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 2735.997068523269, + 9.622350674373795, + 2806.971051667277, + 262.5527112579136 + ] + }, + { + "segmentation": [ + [ + 2758.0, + 294.0, + 2833.0, + 460.0, + 2892.0, + 578.0, + 2936.0, + 663.0, + 2965.0, + 648.0, + 2877.0, + 463.0, + 2808.0, + 317.0, + 2802.0, + 260.0 + ] + ], + "id": 21, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 2757.9890069622575, + 259.5535370217451, + 2964.913155001832, + 663.4423341590972 + ] + }, + { + "segmentation": [ + [ + 2939.0, + 673.0, + 3004.0, + 856.0, + 3049.0, + 1017.0, + 3097.0, + 1170.0, + 3120.0, + 1277.0, + 3205.0, + 1280.0, + 3097.0, + 982.0, + 3004.0, + 733.0, + 2968.0, + 658.0, + 2947.0, + 652.0 + ] + ], + "id": 22, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 2938.922682301209, + 652.4453619598129, + 3204.82521069989, + 1280.2725020644095 + ] + }, + { + "segmentation": [ + [ + 3119.0, + 1286.0, + 3157.0, + 1420.0, + 3169.0, + 1519.0, + 3164.0, + 1605.0, + 3194.0, + 1639.0, + 3198.0, + 1699.0, + 3201.0, + 1775.0, + 3221.0, + 1830.0, + 3243.0, + 1886.0, + 3411.0, + 1890.0, + 3285.0, + 1497.0, + 3200.0, + 1279.0 + ] + ], + "id": 23, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3118.856724074753, + 1279.27277731902, + 3410.749725174056, + 1890.1045967519956 + ] + }, + { + "segmentation": [ + [ + 3244.0, + 1893.0, + 3299.0, + 2024.0, + 3339.0, + 2098.0, + 3347.0, + 2166.0, + 3369.0, + 2253.0, + 3393.0, + 2283.0, + 3434.0, + 2304.0, + 3474.0, + 2409.0, + 3462.0, + 2234.0, + 3433.0, + 2036.0, + 3408.0, + 1894.0 + ] + ], + "id": 24, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3243.8109197508243, + 1893.1037709881641, + 3473.726639794797, + 2408.9617396091385 + ] + }, + { + "segmentation": [ + [ + 3474.0, + 2424.0, + 3519.0, + 2573.0, + 3562.0, + 2699.0, + 3613.0, + 2772.0, + 3691.0, + 2964.0, + 3714.0, + 2924.0, + 3607.0, + 2664.0, + 3485.0, + 2432.0 + ] + ], + "id": 25, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3473.726639794797, + 2423.957610789981, + 3713.638695492855, + 2963.808973300303 + ] + }, + { + "segmentation": [ + [ + 3692.0, + 2973.0, + 3755.0, + 3147.0, + 3804.0, + 3267.0, + 3792.0, + 3336.0, + 3795.0, + 3397.0, + 3816.0, + 3479.0, + 3845.0, + 3587.0, + 3864.0, + 3629.0, + 3974.0, + 3623.0, + 3938.0, + 3516.0, + 3845.0, + 3287.0, + 3732.0, + 3010.0, + 3713.0, + 2976.0 + ] + ], + "id": 26, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3691.646757053866, + 2972.806496008808, + 3973.5434224990836, + 3628.6259289843106 + ] + }, + { + "segmentation": [ + [ + 3726.0, + 2851.0, + 3747.0, + 2973.0, + 3790.0, + 2973.0, + 3741.0, + 2850.0, + 3736.0, + 2852.0 + ] + ], + "id": 27, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3725.634298277758, + 2849.8403523259017, + 3789.610846463906, + 2972.806496008808 + ] + }, + { + "segmentation": [ + [ + 3526.0, + 1949.0, + 3514.0, + 1810.0, + 3482.0, + 1660.0, + 3466.0, + 1572.0, + 3417.0, + 1378.0, + 3372.0, + 1116.0, + 3308.0, + 907.0, + 3272.0, + 765.0, + 3237.0, + 776.0 + ] + ], + "id": 28, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3236.8134847929646, + 765.4142581888246, + 3525.7075851960426, + 1949.0883567299754 + ] + }, + { + "segmentation": [ + [ + 3255.0, + 752.0, + 3156.0, + 293.0, + 3140.0, + 140.0, + 3133.0, + 104.0, + 3125.0, + 22.0, + 3151.0, + 9.0, + 3162.0, + 116.0, + 3181.0, + 258.0, + 3195.0, + 349.0, + 3216.0, + 491.0, + 3247.0, + 650.0 + ] + ], + "id": 29, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3124.854525467204, + 8.622625928984311, + 3254.806888970319, + 752.4178364987613 + ] + }, + { + "segmentation": [ + [ + 4341.0, + 84.0, + 4285.0, + 121.0, + 4229.0, + 189.0, + 4156.0, + 272.0, + 4113.0, + 373.0, + 4075.0, + 436.0, + 4050.0, + 484.0, + 4002.0, + 565.0, + 3965.0, + 647.0, + 3979.0, + 673.0, + 4120.0, + 434.0, + 4257.0, + 199.0 + ] + ], + "id": 30, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3964.5467204104066, + 83.60198183319571, + 4341.408574569439, + 673.439581612992 + ] + }, + { + "segmentation": [ + [ + 3700.0, + 385.0, + 3654.0, + 554.0, + 3637.0, + 653.0, + 3632.0, + 742.0, + 3660.0, + 765.0, + 3674.0, + 590.0, + 3703.0, + 420.0 + ] + ], + "id": 31, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3631.6687431293512, + 384.51913019543076, + 3702.64272627336, + 765.4142581888246 + ] + }, + { + "segmentation": [ + [ + 3608.0, + 488.0, + 3581.0, + 707.0, + 3592.0, + 760.0, + 3614.0, + 755.0, + 3624.0, + 508.0, + 3630.0, + 447.0 + ] + ], + "id": 32, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3580.687431293514, + 446.50206440957885, + 3629.6694759985344, + 760.4156344618773 + ] + }, + { + "segmentation": [ + [ + 3585.0, + 784.0, + 3604.0, + 1242.0, + 3618.0, + 1216.0, + 3612.0, + 763.0 + ] + ], + "id": 33, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3584.6859655551484, + 763.4148086980457, + 3617.6738732136314, + 1242.2829617396092 + ] + }, + { + "segmentation": [ + [ + 3627.0, + 809.0, + 3618.0, + 1134.0, + 3651.0, + 1275.0, + 3625.0, + 1212.0, + 3649.0, + 1203.0, + 3649.0, + 909.0, + 3649.0, + 765.0 + ] + ], + "id": 34, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3617.6738732136314, + 765.4142581888246, + 3650.661780872114, + 1275.273878337462 + ] + }, + { + "segmentation": [ + [ + 3604.0, + 1277.0, + 3570.0, + 1353.0, + 3584.0, + 1424.0, + 3624.0, + 1628.0, + 3670.0, + 1811.0, + 3691.0, + 1786.0, + 3611.0, + 1330.0 + ] + ], + "id": 35, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3569.69146207402, + 1277.273327828241, + 3690.6471234884575, + 1811.126341866226 + ] + }, + { + "segmentation": [ + [ + 3709.0, + 1767.0, + 3725.0, + 1295.0, + 3722.0, + 1226.0, + 3695.0, + 1222.0, + 3678.0, + 1239.0, + 3699.0, + 1641.0, + 3714.0, + 1771.0 + ] + ], + "id": 36, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3677.6518871381463, + 1222.2884668318193, + 3724.6346647123487, + 1771.1373520506468 + ] + }, + { + "segmentation": [ + [ + 3609.0, + 1264.0, + 3628.0, + 1338.0, + 3656.0, + 1455.0, + 3655.0, + 1511.0, + 3668.0, + 1609.0, + 3683.0, + 1483.0, + 3644.0, + 1292.0, + 3640.0, + 1269.0 + ] + ], + "id": 37, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 3608.6771711249544, + 1264.276906138178, + 3682.6500549651887, + 1609.18194329755 + ] + }, + { + "segmentation": [ + [ + 4193.0, + 2969.0, + 4281.0, + 3379.0, + 4326.0, + 3606.0, + 4359.0, + 3613.0, + 4297.0, + 3329.0, + 4213.0, + 2967.0 + ] + ], + "id": 38, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 4193.462806888971, + 2966.808147536471, + 4359.401978746794, + 3612.6303330580786 + ] + }, + { + "segmentation": [ + [ + 4445.0, + 3619.0, + 4395.0, + 3622.0, + 4323.0, + 3338.0, + 4272.0, + 2979.0, + 4337.0, + 2975.0, + 4370.0, + 3288.0, + 4421.0, + 3521.0 + ] + ], + "id": 39, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 4272.433858556248, + 2974.8059454995873, + 4445.370465371931, + 3621.627855766584 + ] + }, + { + "segmentation": [ + [ + 4499.0, + 3210.0, + 4501.0, + 3615.0, + 4544.0, + 3619.0, + 4524.0, + 3209.0 + ] + ], + "id": 40, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 4499.350677903994, + 3208.7415359207266, + 4544.3341883473795, + 3618.6286815304156 + ] + }, + { + "segmentation": [ + [ + 4765.0, + 2971.0, + 4669.0, + 3223.0, + 4623.0, + 3340.0, + 4621.0, + 3440.0, + 4601.0, + 3562.0, + 4608.0, + 3609.0, + 4688.0, + 3318.0, + 4769.0, + 2982.0 + ] + ], + "id": 41, + "category_id": 3, + "image_id": 8, + "area": 0.0, + "bbox": [ + 4601.313301575668, + 2970.807046518029, + 4769.251740564309, + 3608.6314340765207 + ] + }, + { + "segmentation": [ + [ + 3534.0, + 3496.0, + 3539.0, + 3538.0, + 3558.0, + 3504.0 + ] + ], + "id": 42, + "category_id": 2, + "image_id": 9, + "area": 0.0, + "bbox": [ + 3533.7355081555834, + 3495.9540143234076, + 3558.379924717691, + 3538.393516773464 + ] + }, + { + "segmentation": [ + [ + 4206.0, + 3102.0, + 4213.0, + 3136.0, + 4235.0, + 3107.0 + ] + ], + "id": 43, + "category_id": 2, + "image_id": 9, + "area": 0.0, + "bbox": [ + 4205.980426599749, + 3101.6773464003018, + 4234.732245922209, + 3135.90275160196 + ] + }, + { + "segmentation": [ + [ + 2881.0, + 1835.0, + 2892.0, + 1887.0, + 2938.0, + 1898.0, + 2942.0, + 1867.0, + 2927.0, + 1826.0 + ] + ], + "id": 44, + "category_id": 1, + "image_id": 10, + "area": 0.0, + "bbox": [ + 2880.6584692597244, + 1825.7542404824728, + 2942.2695106649935, + 1898.3120995099887 + ] + } + ], + "categories": [ + { + "id": 1, + "name": "Class 1" + }, + { + "id": 2, + "name": "Class 2" + }, + { + "id": 3, + "name": "Class 3" + }, + { + "id": 4, + "name": "Class 4" + } + ] +} \ No newline at end of file diff --git a/contrib/segmentation/dev-requirements.in b/contrib/segmentation/dev-requirements.in new file mode 100644 index 000000000..a81368c95 --- /dev/null +++ b/contrib/segmentation/dev-requirements.in @@ -0,0 +1,9 @@ +-c requirements.txt +black +flake8 +isort +pip-tools +pre-commit +pytest +pytest-cov +pytest-mock \ No newline at end of file diff --git a/contrib/segmentation/dev-requirements.txt b/contrib/segmentation/dev-requirements.txt new file mode 100644 index 000000000..4d8d63a1d --- /dev/null +++ b/contrib/segmentation/dev-requirements.txt @@ -0,0 +1,100 @@ +# +# This file is autogenerated by pip-compile +# To update, run: +# +# pip-compile dev-requirements.in +# +appdirs==1.4.4 + # via + # black + # virtualenv +attrs==20.3.0 + # via + # -c requirements.txt + # pytest +black==21.5b1 + # via -r dev-requirements.in +cfgv==3.2.0 + # via pre-commit +click==7.1.2 + # via + # -c requirements.txt + # black + # pip-tools +coverage==5.5 + # via pytest-cov +distlib==0.3.1 + # via virtualenv +filelock==3.0.12 + # via virtualenv +flake8==3.9.1 + # via -r dev-requirements.in +identify==2.2.3 + # via pre-commit +iniconfig==1.1.1 + # via pytest +isort==5.8.0 + # via -r dev-requirements.in +mccabe==0.6.1 + # via flake8 +mypy-extensions==0.4.3 + # via black +nodeenv==1.6.0 + # via pre-commit +packaging==20.9 + # via + # -c requirements.txt + # pytest +pathspec==0.8.1 + # via + # -c requirements.txt + # black +pep517==0.10.0 + # via pip-tools +pip-tools==6.1.0 + # via -r dev-requirements.in +pluggy==0.13.1 + # via pytest +pre-commit==2.12.1 + # via -r dev-requirements.in +py==1.10.0 + # via pytest +pycodestyle==2.7.0 + # via flake8 +pyflakes==2.3.1 + # via flake8 +pyparsing==2.4.7 + # via + # -c requirements.txt + # packaging +pytest-cov==2.11.1 + # via -r dev-requirements.in +pytest-mock==3.6.1 + # via -r dev-requirements.in +pytest==6.2.4 + # via + # -r dev-requirements.in + # pytest-cov + # pytest-mock +pyyaml==5.4.1 + # via + # -c requirements.txt + # pre-commit +regex==2021.4.4 + # via black +six==1.16.0 + # via + # -c requirements.txt + # virtualenv +toml==0.10.2 + # via + # -c requirements.txt + # black + # pep517 + # pre-commit + # pytest +virtualenv==20.4.4 + # via pre-commit + +# The following packages are considered to be unsafe in a requirements file: +# pip diff --git a/contrib/segmentation/job.yml b/contrib/segmentation/job.yml new file mode 100644 index 000000000..0869f4cd3 --- /dev/null +++ b/contrib/segmentation/job.yml @@ -0,0 +1,23 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json +code: + local_path: . +command: >- + python train.py + --train-dir {inputs.dummy} + --val-dir {inputs.dummy} + --patch-dim "256, 256" +inputs: + dummy: + data: + local_path: data/train + mode: mount +compute: + target: azureml:cpu-cluster + instance_count: 1 +# distribution: +# type: pytorch +# process_count: 2 +# azureml:: +environment: azureml:semantic-segmentation:1 +experiment_name: pytorch-semantic-segmentation +description: Train a Semantic Segmentation Model on the Semantic Segmentation Drone Dataset diff --git a/contrib/segmentation/mlops/compute.yml b/contrib/segmentation/mlops/compute.yml new file mode 100644 index 000000000..40cd77d48 --- /dev/null +++ b/contrib/segmentation/mlops/compute.yml @@ -0,0 +1,6 @@ +$schema: https://azuremlschemas.azureedge.net/latest/compute.schema.json +name: gpu-cluster +type: amlcompute +size: Standard_NC24s_v3 +min_instances: 0 +max_instances: 2 diff --git a/contrib/segmentation/mlops/conda_envs/conda_env.yml b/contrib/segmentation/mlops/conda_envs/conda_env.yml new file mode 100644 index 000000000..0252b7c99 --- /dev/null +++ b/contrib/segmentation/mlops/conda_envs/conda_env.yml @@ -0,0 +1,25 @@ +name: conda-env +channels: + - conda-forge +dependencies: + - python=3.8 + - pip + - pip: + - albumentations + - azure-storage-blob + - azureml-sdk + - click + - ipyplot + - jupyter + - jupytext + - matplotlib + - numpy>=1.20 + - pandas + - pycocotools + - python-dotenv + - scikit-learn + - seaborn + - segmentation-models-pytorch + - torch + - torchmetrics + - torchvision \ No newline at end of file diff --git a/contrib/segmentation/mlops/drone_dataset.yml b/contrib/segmentation/mlops/drone_dataset.yml new file mode 100644 index 000000000..aec6436de --- /dev/null +++ b/contrib/segmentation/mlops/drone_dataset.yml @@ -0,0 +1,5 @@ +$schema: https://azuremlschemas.azureedge.net/latest/dataset.schema.json +name: drone-dataset +version: 1 +datastore: azureml:workspaceblobstore +local_path: diff --git a/contrib/segmentation/mlops/environment.yml b/contrib/segmentation/mlops/environment.yml new file mode 100644 index 000000000..878a97ea8 --- /dev/null +++ b/contrib/segmentation/mlops/environment.yml @@ -0,0 +1,6 @@ +$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json +name: semantic-segmentation +version: 1 +docker: + image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04 +conda_file: file:conda_envs/conda_env.yml \ No newline at end of file diff --git a/contrib/segmentation/requirements.in b/contrib/segmentation/requirements.in new file mode 100644 index 000000000..c47ef3767 --- /dev/null +++ b/contrib/segmentation/requirements.in @@ -0,0 +1,21 @@ +albumentations +azure-storage-blob +azureml-sdk +click +ipyplot +jupyter +jupytext +matplotlib +# Change to numpy C-api in versions >=1.20.0 are currently incompatible with some libraries +# pycocotools (any version) is currently one known issue +# https://numpy.org/doc/stable/release/1.20.0-notes.html#size-of-np-ndarray-and-np-void-changed +numpy>=1.20 +pandas +pycocotools +python-dotenv +scikit-learn +seaborn +segmentation-models-pytorch +torch +torchmetrics +torchvision \ No newline at end of file diff --git a/contrib/segmentation/requirements.txt b/contrib/segmentation/requirements.txt new file mode 100644 index 000000000..2458656c6 --- /dev/null +++ b/contrib/segmentation/requirements.txt @@ -0,0 +1,533 @@ +# +# This file is autogenerated by pip-compile +# To update, run: +# +# pip-compile requirements.in +# +adal==1.2.7 + # via + # azureml-core + # msrestazure +albumentations==1.0.0 + # via -r requirements.in +applicationinsights==0.11.10 + # via azureml-telemetry +argon2-cffi==20.1.0 + # via notebook +async-generator==1.10 + # via nbclient +attrs==20.3.0 + # via + # jsonschema + # markdown-it-py +azure-common==1.1.27 + # via + # azure-graphrbac + # azure-mgmt-authorization + # azure-mgmt-containerregistry + # azure-mgmt-keyvault + # azure-mgmt-resource + # azure-mgmt-storage + # azureml-core +azure-core==1.13.0 + # via + # azure-identity + # azure-storage-blob +azure-graphrbac==0.61.1 + # via azureml-core +azure-identity==1.4.1 + # via azureml-dataprep +azure-mgmt-authorization==0.61.0 + # via azureml-core +azure-mgmt-containerregistry==2.8.0 + # via azureml-core +azure-mgmt-keyvault==2.2.0 + # via azureml-core +azure-mgmt-resource==12.1.0 + # via azureml-core +azure-mgmt-storage==11.2.0 + # via azureml-core +azure-storage-blob==12.8.1 + # via -r requirements.in +azureml-automl-core==1.27.0 + # via azureml-train-automl-client +azureml-core==1.27.0 + # via + # azureml-pipeline-core + # azureml-sdk + # azureml-telemetry + # azureml-train-automl-client + # azureml-train-core +azureml-dataprep-native==33.0.0 + # via azureml-dataprep +azureml-dataprep-rslex==1.12.1 + # via azureml-dataprep +azureml-dataprep==2.14.2 + # via azureml-dataset-runtime +azureml-dataset-runtime[fuse]==1.27.0 + # via + # azureml-automl-core + # azureml-sdk + # azureml-train-automl-client +azureml-pipeline-core==1.27.0 + # via + # azureml-pipeline + # azureml-pipeline-steps +azureml-pipeline-steps==1.27.0 + # via azureml-pipeline +azureml-pipeline==1.27.0 + # via azureml-sdk +azureml-sdk==1.27.0 + # via -r requirements.in +azureml-telemetry==1.27.0 + # via + # azureml-automl-core + # azureml-train-automl-client + # azureml-train-core +azureml-train-automl-client==1.27.0 + # via + # azureml-pipeline-steps + # azureml-sdk +azureml-train-core==1.27.0 + # via + # azureml-pipeline-steps + # azureml-train +azureml-train-restclients-hyperdrive==1.27.0 + # via azureml-train-core +azureml-train==1.27.0 + # via azureml-sdk +backcall==0.2.0 + # via ipython +backports.tempfile==1.0 + # via azureml-core +backports.weakref==1.0.post1 + # via backports.tempfile +bleach==3.3.0 + # via nbconvert +certifi==2020.12.5 + # via + # msrest + # requests +cffi==1.14.5 + # via + # argon2-cffi + # cryptography +chardet==4.0.0 + # via requests +click==7.1.2 + # via -r requirements.in +cloudpickle==1.6.0 + # via azureml-dataprep +contextlib2==0.6.0.post1 + # via azureml-core +cryptography==3.4.7 + # via + # adal + # azure-identity + # azure-storage-blob + # azureml-core + # msal + # pyjwt + # pyopenssl + # secretstorage +cycler==0.10.0 + # via matplotlib +cython==0.29.23 + # via pycocotools +decorator==4.4.2 + # via + # ipython + # networkx +defusedxml==0.7.1 + # via nbconvert +distro==1.5.0 + # via dotnetcore2 +docker==4.4.4 + # via azureml-core +dotnetcore2==2.1.20 + # via azureml-dataprep +efficientnet-pytorch==0.6.3 + # via segmentation-models-pytorch +entrypoints==0.3 + # via nbconvert +fusepy==3.0.1 + # via azureml-dataset-runtime +idna==2.10 + # via requests +imageio==2.9.0 + # via scikit-image +ipykernel==5.5.4 + # via + # ipywidgets + # jupyter + # jupyter-console + # notebook + # qtconsole +ipyplot==1.1.0 + # via -r requirements.in +ipython-genutils==0.2.0 + # via + # nbformat + # notebook + # qtconsole + # traitlets +ipython==7.23.1 + # via + # ipykernel + # ipyplot + # ipywidgets + # jupyter-console +ipywidgets==7.6.3 + # via jupyter +isodate==0.6.0 + # via msrest +jedi==0.18.0 + # via ipython +jeepney==0.6.0 + # via secretstorage +jinja2==2.11.3 + # via + # nbconvert + # notebook +jmespath==0.10.0 + # via azureml-core +joblib==1.0.1 + # via scikit-learn +jsonpickle==2.0.0 + # via azureml-core +jsonschema==3.2.0 + # via nbformat +jupyter-client==6.1.12 + # via + # ipykernel + # jupyter-console + # nbclient + # notebook + # qtconsole +jupyter-console==6.4.0 + # via jupyter +jupyter-core==4.7.1 + # via + # jupyter-client + # nbconvert + # nbformat + # notebook + # qtconsole +jupyter==1.0.0 + # via -r requirements.in +jupyterlab-pygments==0.1.2 + # via nbconvert +jupyterlab-widgets==1.0.0 + # via ipywidgets +jupytext==1.11.2 + # via -r requirements.in +kiwisolver==1.3.1 + # via matplotlib +markdown-it-py[plugins]==1.0.0 + # via + # jupytext + # mdit-py-plugins +markupsafe==1.1.1 + # via jinja2 +matplotlib-inline==0.1.2 + # via ipython +matplotlib==3.4.1 + # via + # -r requirements.in + # pycocotools + # scikit-image + # seaborn +mdit-py-plugins==0.2.8 + # via markdown-it-py +mistune==0.8.4 + # via nbconvert +msal-extensions==0.2.2 + # via azure-identity +msal==1.11.0 + # via + # azure-identity + # msal-extensions +msrest==0.6.21 + # via + # azure-graphrbac + # azure-mgmt-authorization + # azure-mgmt-containerregistry + # azure-mgmt-keyvault + # azure-mgmt-resource + # azure-mgmt-storage + # azure-storage-blob + # azureml-core + # azureml-train-restclients-hyperdrive + # msrestazure +msrestazure==0.6.4 + # via + # azure-graphrbac + # azure-mgmt-authorization + # azure-mgmt-containerregistry + # azure-mgmt-keyvault + # azure-mgmt-resource + # azure-mgmt-storage + # azureml-core + # azureml-train-restclients-hyperdrive +munch==2.5.0 + # via pretrainedmodels +nbclient==0.5.3 + # via nbconvert +nbconvert==6.0.7 + # via + # jupyter + # notebook +nbformat==5.1.3 + # via + # ipywidgets + # jupytext + # nbclient + # nbconvert + # notebook +ndg-httpsclient==0.5.1 + # via azureml-core +nest-asyncio==1.5.1 + # via nbclient +networkx==2.5.1 + # via scikit-image +notebook==6.3.0 + # via + # jupyter + # widgetsnbextension +numpy==1.20.2 + # via + # -r requirements.in + # albumentations + # azureml-dataset-runtime + # imageio + # ipyplot + # matplotlib + # opencv-python-headless + # pandas + # pyarrow + # pywavelets + # scikit-image + # scikit-learn + # scipy + # seaborn + # tifffile + # torch + # torchvision +oauthlib==3.1.0 + # via requests-oauthlib +opencv-python-headless==4.5.2.52 + # via albumentations +packaging==20.9 + # via + # bleach + # torchmetrics +pandas==1.2.4 + # via + # -r requirements.in + # seaborn +pandocfilters==1.4.3 + # via nbconvert +parso==0.8.2 + # via jedi +pathspec==0.8.1 + # via azureml-core +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython +pillow==8.2.0 + # via + # imageio + # ipyplot + # matplotlib + # scikit-image + # torchvision +portalocker==1.7.1 + # via msal-extensions +pretrainedmodels==0.7.4 + # via segmentation-models-pytorch +prometheus-client==0.10.1 + # via notebook +prompt-toolkit==3.0.18 + # via + # ipython + # jupyter-console +ptyprocess==0.7.0 + # via + # pexpect + # terminado +pyarrow==1.0.1 + # via azureml-dataset-runtime +pyasn1==0.4.8 + # via ndg-httpsclient +pycocotools==2.0.2 + # via -r requirements.in +pycparser==2.20 + # via cffi +pygments==2.9.0 + # via + # ipython + # jupyter-console + # jupyterlab-pygments + # nbconvert + # qtconsole +pyjwt[crypto]==2.1.0 + # via + # adal + # azureml-core + # msal +pyopenssl==20.0.1 + # via + # azureml-core + # ndg-httpsclient +pyparsing==2.4.7 + # via + # matplotlib + # packaging +pyrsistent==0.17.3 + # via jsonschema +python-dateutil==2.8.1 + # via + # adal + # azureml-core + # jupyter-client + # matplotlib + # pandas +python-dotenv==0.17.1 + # via -r requirements.in +pytz==2021.1 + # via + # azureml-core + # pandas +pywavelets==1.1.1 + # via scikit-image +pyyaml==5.4.1 + # via + # albumentations + # jupytext +pyzmq==22.0.3 + # via + # jupyter-client + # notebook + # qtconsole +qtconsole==5.1.0 + # via jupyter +qtpy==1.9.0 + # via qtconsole +requests-oauthlib==1.3.0 + # via msrest +requests==2.25.1 + # via + # adal + # azure-core + # azureml-core + # azureml-train-restclients-hyperdrive + # docker + # msal + # msrest + # requests-oauthlib +ruamel.yaml.clib==0.2.2 + # via ruamel.yaml +ruamel.yaml==0.17.4 + # via azureml-core +scikit-image==0.18.1 + # via albumentations +scikit-learn==0.24.2 + # via -r requirements.in +scipy==1.6.3 + # via + # albumentations + # scikit-image + # scikit-learn + # seaborn +seaborn==0.11.1 + # via -r requirements.in +secretstorage==3.3.1 + # via azureml-core +segmentation-models-pytorch==0.1.3 + # via -r requirements.in +send2trash==1.5.0 + # via notebook +shortuuid==1.0.1 + # via ipyplot +six==1.16.0 + # via + # argon2-cffi + # azure-core + # azure-identity + # bleach + # cycler + # docker + # isodate + # jsonschema + # msrestazure + # munch + # pyopenssl + # python-dateutil + # websocket-client +terminado==0.9.4 + # via notebook +testpath==0.4.4 + # via nbconvert +threadpoolctl==2.1.0 + # via scikit-learn +tifffile==2021.4.8 + # via scikit-image +timm==0.3.2 + # via segmentation-models-pytorch +toml==0.10.2 + # via jupytext +torch==1.8.1 + # via + # -r requirements.in + # efficientnet-pytorch + # pretrainedmodels + # timm + # torchmetrics + # torchvision +torchmetrics==0.3.2 + # via -r requirements.in +torchvision==0.9.1 + # via + # -r requirements.in + # pretrainedmodels + # segmentation-models-pytorch + # timm +tornado==6.1 + # via + # ipykernel + # jupyter-client + # notebook + # terminado +tqdm==4.61.0 + # via pretrainedmodels +traitlets==5.0.5 + # via + # ipykernel + # ipython + # ipywidgets + # jupyter-client + # jupyter-core + # matplotlib-inline + # nbclient + # nbconvert + # nbformat + # notebook + # qtconsole +typing-extensions==3.10.0.0 + # via torch +urllib3==1.26.4 + # via + # azureml-core + # requests +wcwidth==0.2.5 + # via prompt-toolkit +webencodings==0.5.1 + # via bleach +websocket-client==0.59.0 + # via docker +widgetsnbextension==3.5.1 + # via ipywidgets + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/contrib/segmentation/src/azure/__init__.py b/contrib/segmentation/src/azure/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/src/azure/data_labeling.py b/contrib/segmentation/src/azure/data_labeling.py new file mode 100644 index 000000000..6afbfc7a0 --- /dev/null +++ b/contrib/segmentation/src/azure/data_labeling.py @@ -0,0 +1,51 @@ +import copy +from typing import Dict, List + +from ..preprocessing.bbox import convert_bbox +from ..preprocessing.segmentation import convert_segmentation + + +def aml_coco_labels_to_standard_coco(labels_json: Dict): + """Serialize an AML COCO labels dictionary to a standard COCO labels dictionary + + Parameters + ---------- + labels_json : dict + Labels in AML COCO format + + Returns + ------- + labels_json : dict + Labels in standard COCO format + """ + labels_json = copy.deepcopy(labels_json) + + for annotation_json in labels_json["annotations"]: + # Index is image_id - 1 because the ids are 1-index based + image_json = labels_json["images"][annotation_json["image_id"] - 1] + + # Convert segmentation + + # Segmentation is nested in another array + segmentation: List[float] = annotation_json["segmentation"][0] + segmentation = convert_segmentation( + segmentation, + source_format="aml_coco", + target_format="coco", + image_width=image_json["width"], + image_height=image_json["height"], + ) + annotation_json["segmentation"] = [segmentation] + + # Convert bounding box + bbox: List[float] = annotation_json["bbox"] + bbox = convert_bbox( + bbox, + source_format="aml_coco", + target_format="coco", + image_width=image_json["width"], + image_height=image_json["height"], + ) + annotation_json["bbox"] = bbox + + return labels_json diff --git a/contrib/segmentation/src/datasets/__init__.py b/contrib/segmentation/src/datasets/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/src/datasets/coco.py b/contrib/segmentation/src/datasets/coco.py new file mode 100644 index 000000000..88c6c7135 --- /dev/null +++ b/contrib/segmentation/src/datasets/coco.py @@ -0,0 +1,75 @@ +import json +from os.path import join +from typing import Dict, List, Tuple, Union + +import numpy as np +from PIL import Image + +from src.datasets.coco_utils import ( + annotation_to_mask_array, + coco_annotations_by_image_id, + filter_coco_json_by_category_ids, +) + +BoundingBox = Union[Tuple[int, int, int, int], List[int]] + + +class CocoDataset: + def __init__( + self, + labels_filepath: str, + root_dir: str, + classes: List[int], + annotation_format: str, + ): + coco_json: Dict = json.load(open(labels_filepath, "r")) + coco_json = filter_coco_json_by_category_ids(coco_json, classes) + + self.root_dir = root_dir + self.images: List[Dict] = coco_json["images"] + self.annotations: List[Dict] = coco_json["annotations"] + self.categories: List[Dict] = coco_json["categories"] + self.classes = classes + self.annotation_format = annotation_format + self.annotations_by_image_id = coco_annotations_by_image_id(coco_json) + self.images_by_image_id: Dict[int, Dict] = { + image_json["id"]: image_json for image_json in self.images + } + + def get_semantic_segmentation_info_for_image( + self, image_id: int + ) -> Tuple[np.ndarray, List[BoundingBox], np.ndarray, List[int]]: + """Get the objects needed to perform semantic segmentation + + Parameters + ---------- + image_id : int + ID of image in dataset + + Returns + ------- + image : np.ndarray + bboxes : List[BoundingBox] + mask : np.ndarray + class_labels : List[int] + """ + image_json = self.images_by_image_id[image_id] + image_filepath = join(self.root_dir, image_json["file_name"]) + image = Image.open(image_filepath).convert("RGB") + image = np.array(image).astype("uint8") + height, width, _ = image.shape + + image_id: int = image_json["id"] + annotations = self.annotations_by_image_id[image_id] + bboxes: List[BoundingBox] = [ann["bbox"] for ann in annotations] + class_labels: List[int] = [ann["category_id"] for ann in annotations] + + mask = annotation_to_mask_array( + width=width, + height=height, + annotations=annotations, + classes=self.classes, + annotation_format=self.annotation_format, + ) + + return image, bboxes, mask, class_labels diff --git a/contrib/segmentation/src/datasets/coco_utils.py b/contrib/segmentation/src/datasets/coco_utils.py new file mode 100644 index 000000000..631f9c748 --- /dev/null +++ b/contrib/segmentation/src/datasets/coco_utils.py @@ -0,0 +1,402 @@ +from copy import deepcopy +from typing import Dict, List, Tuple + +import cv2 +import numpy as np +from PIL import Image + +from src.preprocessing.segmentation import convert_segmentation + + +def rescale_annotation( + width: int, height: int, annotation: Dict, patch_rect: Tuple[int, int, int, int] +) -> Dict: + """rescale the source image annotation wrt the new extracted patch + + Parameters + ---------- + width : int + Original source image width + height : int + Original source image height + annotation : Dict + A single image annotation + patch_rect : Tuple(int, int, int, int) + Width and Height of the extracted patch + + + Returns + ------- + new_annotation : Dict + The rescaled annotation + """ + new_annotation = annotation.copy() + + new_annotation["bbox"] = [ + ((annotation["bbox"][0] * width) - patch_rect[0]) / patch_rect[2], + ((annotation["bbox"][1] * height) - patch_rect[1]) / patch_rect[3], + (annotation["bbox"][2] * width) / patch_rect[2], + (annotation["bbox"][3] * height) / patch_rect[3], + ] + + if "segmentation" in annotation: + segmentation = annotation["segmentation"][0] + seg_x = [ + ((x * width) - patch_rect[0]) / patch_rect[2] for x in segmentation[::2] + ] + seg_y = [ + ((y * height) - patch_rect[1]) / patch_rect[3] for y in segmentation[1::2] + ] + + new_segmentation = [None] * (len(seg_x) + len(seg_y)) + new_segmentation[::2] = seg_x + new_segmentation[1::2] = seg_y + + new_annotation["segmentation"] = [new_segmentation] + + return new_annotation + + +def extract_windowed_patches( + image: object, + annotations: List[Dict], + patch_dimension: Tuple[int, int] = (1000, 1000), + window_overlap: float = 0.1, +) -> List[Tuple[object, List[Dict]]]: + """For an input image with a normalized list of annotations return + a list of all extracted patched images and rescaled annotations + + Parameters + ---------- + image : object + Original source image + annotations : List[Dict] + List of all original source image annotations + patch_dimension : Tuple(Int, Int) + Width and Height of the extracted patch + window_overlap : Float + increment window by % of patch_dimension + + + Returns + ------- + patch_list : List[Tuple[object, List[Dict]]] + List of all extracted patches with rescaled/centered + image and annotations + """ + patches = [] + width = image.width + height = image.height + + # move window of patch_dimension on the original image + for x in range(0, width, int(patch_dimension[0] * window_overlap)): + for y in range(0, height, int(patch_dimension[1] * window_overlap)): + # get patch dimension + x = min(x, width - patch_dimension[0]) + y = min(y, height - patch_dimension[1]) + + # check if patch contains at least one annotation + patch_annotations = [] + for annotation in annotations: + bbox = annotation["bbox"].copy() + bbox[0] = bbox[0] * width + bbox[1] = bbox[1] * height + bbox[2] = bbox[2] * width + bbox[3] = bbox[3] * height + + if ( + bbox[0] >= x + and bbox[0] + bbox[2] < x + patch_dimension[0] + and bbox[1] >= y + and bbox[1] + bbox[3] < y + patch_dimension[1] + ): + + # rescale bbox and segments + rescaled_annotation = rescale_annotation( + width, + height, + annotation, + (x, y, patch_dimension[0], patch_dimension[1]), + ) + + patch_annotations.append(rescaled_annotation) + + if len(patch_annotations) > 0: + # crop the image for the patch before zoom + patch = image.crop( + (x, y, x + patch_dimension[0], y + patch_dimension[1]) + ) + + # rescale bbox and segments + + patches.append((patch, patch_annotations)) + + return patches + + +def annotation_to_mask_array( + width: int, + height: int, + annotations: List[Dict], + classes: List[int], + annotation_format="aml_coco", +) -> np.ndarray: + """Convert annotations to a mask numpy array + + Parameters + ---------- + width : int + Original source image width + height : int + Original source image height + annotations : List[Dict] + List of all original source image annotations + classes : List[int] + list of classes to use for the patch image + annotation_format : {'coco', 'aml_coco'} + Format that the annotations are in + + Returns + ------- + mask_array : numpy.ndarray + The mask array with shape `(height, width)` + """ + mask_array = np.zeros((height, width), dtype=np.uint8) + + for annotation in annotations: + if int(annotation["category_id"]) in classes: + segmentation = annotation["segmentation"][0] + segmentation = convert_segmentation( + segmentation, + source_format=annotation_format, + target_format="coco", + image_height=height, + image_width=width, + ) + + segmentation_array = np.array( + [list(x) for x in zip(segmentation[::2], segmentation[1::2])], + dtype=np.int32, + ) + cv2.fillPoly( + mask_array, [segmentation_array], color=(annotation["category_id"]) + ) + + return mask_array + + +def annotation_to_mask_image( + width: int, height: int, annotations: List[Dict], classes: List[int] +) -> object: + """Convert annotations to a mask image + + Parameters + ---------- + width : int + Original source image width + height : int + Original source image height + annotations : List[Dict] + List of all original source image annotations + classes : List[int] + list of classes to use for the patch image + + Returns + ------- + mask_array : object + The mask image + """ + mask_array = annotation_to_mask_array(width, height, annotations, classes) + return Image.fromarray(mask_array) + + +def extract_windowed_patches_and_mask_images( + image: object, + annotations: List[Dict], + classes: List[int], + patch_dimension: Tuple[int, int] = (1000, 1000), + window_overlap: float = 0.1, +) -> List[Tuple[object, object]]: + """For an input image with a normalized list of annotations return + a list of all extracted patched images and corresponding mask images + + Parameters + ---------- + image : object + Original source image + annotations : List[Dict] + List of all original source image annotations + classes : List[int] + list of classes to use for the patch image + patch_dimension : Tuple(Int, Int) + Width and Height of the extracted patch + window_overlap : Float + increment window by % of patch_dimension + + Returns + ------- + patch_list : List[Tuple[object, object]] + List of all extracted patches and corresponding mask images + """ + + def convert(width, height, patch): + return (patch[0], annotation_to_mask_image(width, height, patch[1], classes)) + + patch_list = extract_windowed_patches( + image, annotations, patch_dimension, window_overlap + ) + return [convert(patch_dimension[0], patch_dimension[1], p) for p in patch_list] + + +def filter_coco_annotations_by_category_ids( + coco_json: Dict, category_ids: List[int] +) -> Dict: + """Filter COCO annotations to only contain the given category_ids + + Parameters + ---------- + coco_json : Dict + COCO JSON read in as a Dictionary + category_ids : List[int] + Annotations containing a category_id in category_ids will be retained + + Returns + ------- + coco_json : Dict + COCO JSON with only the annotations for the given category_ids + """ + coco_json = deepcopy(coco_json) + annotations = list( + filter( + lambda ann: ann["category_id"] in category_ids, + coco_json["annotations"], + ) + ) + coco_json["annotations"] = annotations + return coco_json + + +def filter_coco_json_by_category_ids(coco_json: Dict, category_ids: List[int]): + """Filter images and annotations in COCO JSON by category_ids + + Parameters + ---------- + coco_json : Dict + COCO JSON read in as a Dictionary + category_ids : List[int] + List of category ids that the COCO JSON will retain images and annotations for + + Returns + ------- + """ + coco_json = deepcopy(coco_json) + coco_json["annotations"] = list( + filter( + lambda ann: ann["category_id"] in category_ids, + coco_json["annotations"], + ) + ) + annotations_by_image_id = coco_annotations_by_image_id(coco_json) + coco_json["images"] = list( + filter( + lambda image: image["id"] in annotations_by_image_id, + coco_json["images"], + ) + ) + coco_json["categories"] = list( + filter(lambda category: category["id"] in category_ids, coco_json["categories"]) + ) + + return coco_json + + +def coco_annotations_by_image_id(coco_json: Dict) -> Dict[int, List]: + """Restructure the "annotations" section of the COCO data format + to be indexable by the image_id + + Parameters + ---------- + coco_json : Dict + COCO JSON read in as a Dictionary + + Returns + ------- + annotations_by_image_id : Dict[int, List] + Dictionary with key as the image_id to the list of annotations within the image + """ + annotations_by_image_id: Dict[int, List] = {} + for annotation in coco_json["annotations"]: + image_id = annotation["image_id"] + if image_id not in annotations_by_image_id: + annotations_by_image_id[image_id] = [annotation] + else: + annotations_by_image_id[image_id].append(annotation) + return annotations_by_image_id + + +def extract_windowed_patches_and_mask_images_sub_annotation( + image: object, + annotations: List[Dict], + classes: List[int], + patch_dimension: Tuple[int, int] = (1000, 1000), + window_overlap: float = 0.1, + threshold: int = 100, +) -> List[Tuple[object, object]]: + """For an input image with a normalized list of annotations return + a list of all extracted patched images and corresponding mask images + + Parameters + ---------- + image : object + Original source image + annotations : List[Dict] + List of all original source image annotations + classes : List[int] + list of classes to use for the patch image + patch_dimension : Tuple(Int, Int) + Width and Height of the extracted patch + window_overlap : Float + increment window by % of patch_dimension + threshold : Int + minimum number of pixels in patch mask + + Returns + ------- + patch_list : List[Tuple[object, object]] + List of all extracted patches and corresponding mask images + """ + patches = [] + width = image.width + height = image.height + + mask_array = annotation_to_mask_array(width, height, annotations, classes) + mask_image = Image.fromarray(mask_array) + # Get monochromatic mask array in order to count number of pixels different than background. + # This array must also be transposed due to differences in the x,y coordinates between + # Pillow and Numpy matrix + mask_mono_array = np.where(mask_array > 0, 1, 0).astype("uint8").transpose() + + processed = set() + + # move window of patch_dimension on the original image + for x in range(0, width, int(patch_dimension[0] * window_overlap)): + for y in range(0, height, int(patch_dimension[1] * window_overlap)): + # get patch dimension + x = min(x, width - patch_dimension[0]) + y = min(y, height - patch_dimension[1]) + + if (x, y) not in processed: + processed.add((x, y)) + if ( + mask_mono_array[ + x : x + patch_dimension[0], y : y + patch_dimension[1] + ].sum() + >= threshold + ): + patch_pos = (x, y, x + patch_dimension[0], y + patch_dimension[1]) + patch_image = image.crop(patch_pos) + patch_mask_image = mask_image.crop(patch_pos) + + patches.append((patch_image, patch_mask_image)) + + return patches diff --git a/contrib/segmentation/src/datasets/drone_dataset.py b/contrib/segmentation/src/datasets/drone_dataset.py new file mode 100644 index 000000000..4ce2c4d35 --- /dev/null +++ b/contrib/segmentation/src/datasets/drone_dataset.py @@ -0,0 +1,65 @@ +from glob import glob +from os.path import join +import numpy as np +import pandas as pd +from PIL import Image +from typing import List, Tuple +from src.datasets.coco import BoundingBox + +from torch.utils.data import Dataset + + +class DroneDataset(Dataset): + def __init__(self, images_dir: str, mask_dir: str, class_dict_path: str): + self.images_dir = images_dir + self.mask_dir = mask_dir + self.images_index = [ + filename.split(".")[0] for filename in glob("*.jpg") + ] + + class_dict = pd.read_csv(class_dict_path).to_dict("index") + self.class_id_to_name = { + class_id: rec["name"] for class_id, rec in class_dict.items() + } + self.rgb_to_class = { + (rec["r"], rec["g"], rec["b"]): int(class_id) + for class_id, rec in class_dict.items() + } + + def _mask_rgb_to_class_label(self, rgb_mask: np.ndarray): + """The Semantic Drone Dataset formats their masks as an RGB mask + To prepare the mask for use with a PyTorch model, we must encode + the mask as a 2D array of class labels + + Parameters + ---------- + rgb_mask : np.ndarray + Mask array with RGB values for each class + + Returns + ------- + mask : np.ndarray + Mask with shape `(height, width)` with class_id values where they occur + """ + height, width, _ = rgb_mask.shape + mask = np.zeros((height, width)) + for i in range(height): + for j in range(width): + mask[i][j] = self.rgb_to_class[tuple(rgb_mask[i][j])] + return mask + + def __getitem__( + self, image_id: int + ) -> Tuple[np.ndarray, List[BoundingBox]]: + filename = self.images_index[image_id] + image_filepath = join(self.images_dir, f"{filename}.jpg") + image = Image.open(image_filepath).convert("RGB") + image = np.array(image).astype("float32") + + mask_filepath = join(self.images_dir, f"{filename}.png") + mask = Image.open(mask_filepath).convert("RGB") + mask = np.array(mask).astype("uint8") + + mask = self._mask_rgb_to_class_label(mask) + + return image, [], mask, [] diff --git a/contrib/segmentation/src/datasets/semantic_segmentation.py b/contrib/segmentation/src/datasets/semantic_segmentation.py new file mode 100644 index 000000000..0348f85b6 --- /dev/null +++ b/contrib/segmentation/src/datasets/semantic_segmentation.py @@ -0,0 +1,576 @@ +import glob +import logging +import math +from os.path import join +from pathlib import Path +from typing import Callable, Dict, List, Optional, Tuple, Union + +import albumentations as A +import numpy as np +import torch +from PIL import Image +from torch.utils.data.dataset import Dataset + +from .coco import CocoDataset + +BoundingBox = Union[Tuple[int, int, int, int], List[int]] +SemanticSegmentationItem = Tuple[ + np.ndarray, List[BoundingBox], np.ndarray, List[int] +] + + +def construct_center_crop_on_bbox_transform( + bbox_to_center_on: BoundingBox, + image_dim: Tuple[int, int], + patch_dim: Tuple[int, int] = (256, 256), +): + """Center crop around a given bounding box + + Parameters + ---------- + bbox_to_center_on : BoundingBox + Bounding box to center the crop on + image_dim : Tuple[int, int] + Dimensions of source image `(height, width)` to take crop from + patch_dim : Tuple[int, int] + Patch dimension `(height, width)` to take crop on + + Returns + ------- + transform : albumentations.Compose + Transformation object from albumentations + """ + height, width = image_dim + x1, y1, x2, y2 = tuple(bbox_to_center_on) + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) + + # Create coordinates for center based cropping on the annotation + x1 = max(x1 - ((patch_dim[0] - (x2 - x1)) // 2), 0) + x2 = min(x1 + patch_dim[0], width) + if x2 == width: + x1 = x2 - patch_dim[0] + + y1 = max(y1 - ((patch_dim[1] - (y2 - y1)) // 2), 0) + y2 = min(y1 + patch_dim[1], height) + if y2 == height: + y1 = y2 - patch_dim[1] + + transform = A.Compose( + [ + A.Crop(x1, y1, x2, y2), + ], + bbox_params=A.BboxParams( + format="pascal_voc", label_fields=["class_labels"] + ), + ) + + return transform + + +def resize_until_center_crop_on_bbox( + bbox_to_center_on: BoundingBox, + image: np.ndarray, + bboxes: List[BoundingBox], + mask: np.ndarray, + class_labels: List[int], + resize_dim=(512, 512), + preserve_aspect_ratio=True, + patch_dim=(256, 256), +): + """ + + Parameters + ---------- + bbox_to_center_on : BoundingBox + Bounding box to center the crop on + image : np.ndarray + Image with shape `(height, width, channels)` + bboxes : List[BoundingBox] + List of bounding boxes contained in image + mask : np.ndarray + Segmentation mask with shape (height, width) and integer entries corresponding to the `class_labels` + class_labels : List[int] + List of class labels corresponding to the bounding boxes + resize_dim : Tuple[int, int] + `(height, width)` to resize image to before taking the crop + preserve_aspect_ratio : bool + True if resizing should not change the aspect ratio + + + patch_dim : Tuple[int, int] + Patch dimension `(height, width)` to take crop on + """ + image = np.array(image) + height, width, _ = image.shape + x1, y1, x2, y2 = tuple(bbox_to_center_on) + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) + + if preserve_aspect_ratio: + dim = max(resize_dim[0], resize_dim[1]) + height = height / dim + width = width / dim + else: + height = resize_dim[0] + width = resize_dim[1] + + transform = A.Compose( + [ + A.Resize(height, width), + ], + bbox_params=A.BboxParams( + format="pascal_voc", label_fields=["class_labels"] + ), + ) + augmented = transform( + image=image, bboxes=bboxes, mask=mask, class_labels=class_labels + ) + image, bboxes, mask, class_labels = ( + augmented["image"], + augmented["bboxes"], + augmented["mask"], + augmented["class_labels"], + ) + + transform = construct_center_crop_on_bbox_transform( + bbox_to_center_on, + image_dim=(image.shape[0], image.shape[1]), + patch_dim=patch_dim, + ) + augmented = transform( + image=image, bboxes=bboxes, mask=mask, class_labels=class_labels + ) + image, bboxes, mask, class_labels = ( + augmented["image"], + augmented["bboxes"], + augmented["mask"], + augmented["class_labels"], + ) + + return image, bboxes, mask, class_labels + + +class SemanticSegmentationResizeDataset(torch.utils.data.Dataset): + def __init__(self, coco: CocoDataset, resize_dim: Tuple[int, int]): + self.coco = coco + self.resize_dim = resize_dim + + def __getitem__(self, idx) -> SemanticSegmentationItem: + main_annotation = self.coco.annotations[idx] + image_id: int = main_annotation["image_id"] + ( + image, + bboxes, + mask, + class_labels, + ) = self.coco.get_semantic_segmentation_info_for_image(image_id) + + if image.shape[:2] != self.resize_dim: + transform = A.Compose( + [ + A.Resize(self.resize_dim[0], self.resize_dim[1]), + ], + bbox_params=A.BboxParams( + format="pascal_voc", label_fields=["class_labels"] + ), + ) + + augmented = transform( + image=image, + bboxes=bboxes, + mask=mask, + class_labels=class_labels, + ) + + image, bboxes, mask, class_labels = ( + augmented["image"], + augmented["bboxes"], + augmented["mask"], + augmented["class_labels"], + ) + + return image, bboxes, mask, class_labels + + def __len__(self): + return len(self.coco.annotations) + + +class SemanticSegmentationStochasticPatchingDataset(torch.utils.data.Dataset): + def __init__( + self, + patch_filepath: str, + mask_filepath: str, + augmentation: Callable = None, + preprocessing: Callable = None, + ): + """ + + Parameters + ---------- + patch_filepath : str + Filepath Pattern to the patch folder + mask_filepath : str + Filepath to the mask folder + augmentation : Callable + Augmentation function with signature for image, bboxes, and mask + preprocessing : Callable + Preprocessing function with signature for image, bboxes, and mask + """ + self.augmentation = augmentation + self.preprocessing = preprocessing + + self.patch_paths = glob.glob(patch_filepath) + self.mask_filepath = mask_filepath + + # self.cache: Dict[int, (np.ndarray, np.ndarray)] = {} + + def __getitem__(self, idx): + # Cache Miss + # if idx not in self.cache: + patch_path = self.patch_paths[idx] + patch_name = patch_path.split("/")[-1] + mask_path = f"{self.mask_filepath}/{patch_name}" + + patch_image = Image.open(patch_path).convert("RGB") + patch_image = np.array(patch_image).astype("float") / 255 + + mask_image = Image.open(mask_path).convert("RGB") + mask_image = mask_image.split()[0] # take only red channel + mask_image = np.array(mask_image).astype("int") + + self.cache[idx] = (patch_image, mask_image) + # Cache Hit + # else: + # patch_image, mask_image = self.cache[idx] + + # apply augmentations via albumentations + if self.augmentation: + sample = self.augmentation(image=patch_image, mask=mask_image) + patch_image, mask_image = ( + sample["image"], + sample["mask"], + ) + + # apply preprocessing + if self.preprocessing: + pass + + # PyTorch images should be in (channel, width, height) + # Images are normally in (width, height, channels + patch_image = np.moveaxis(patch_image, [0, 1, 2], [1, 2, 0]) + + return ( + torch.from_numpy(patch_image).float(), + torch.from_numpy(mask_image).long(), + ) + + def __len__(self): + return len(self.patch_paths) + + +class SemanticSegmentationWithDeterministicPatchingDataset( + torch.utils.data.Dataset +): + def __init__(self, coco: CocoDataset, patch_dim: Tuple[int, int]): + self.coco = coco + self.patch_dim = patch_dim + + def __getitem__(self, annotation_idx): + main_annotation = self.coco.annotations[annotation_idx] + image_id: int = main_annotation["image_id"] + ( + image, + bboxes, + mask, + class_labels, + ) = self.coco.get_semantic_segmentation_info_for_image(image_id) + + bbox_to_center_on_idx = 0 + annotations = self.coco.annotations_by_image_id[image_id] + for idx, ann in enumerate(annotations): + if ann["id"] == main_annotation["id"]: + bbox_to_center_on_idx = idx + break + logging.info( + f"Number of Bounding Boxes in Image for Annotation at index {annotation_idx}: {len(bboxes)}" + ) + bbox_to_center_on: BoundingBox = bboxes[bbox_to_center_on_idx] + transform = construct_center_crop_on_bbox_transform( + bbox_to_center_on, + image_dim=(image.shape[0], image.shape[1]), + patch_dim=self.patch_dim, + ) + + augmented = transform( + image=image, bboxes=bboxes, mask=mask, class_labels=class_labels + ) + + image, bboxes, mask, class_labels = ( + augmented["image"], + augmented["bboxes"], + augmented["mask"], + augmented["class_labels"], + ) + + return image, bboxes, mask, class_labels + + def __len__(self): + return len(self.coco.annotations) + + +class SemanticSegmentationDatasetFullCoverage: + """Semantic Segmentation Dataset Strategy to cover the full image + + This dataset breaks down high resolution images into a series of cropped images. + This allows a high resolution image that does not fit on the GPU to be loaded + sequentially over several batches. + + This dataset is intended to be used for validation / inferencing where we would + like to cover the entirety of the image. + """ + + def __init__( + self, + coco: CocoDataset, + patch_dim: Tuple[int, int], + ): + self.coco = coco + self.patch_dim = patch_dim + + # Calculate number of items that will be in the dataset + n_images = len(self.coco.images) + height: int = int(self.coco.images[0]["height"]) + width: int = int(self.coco.images[0]["width"]) + len_sequence = math.ceil(height / self.patch_dim[0]) * math.ceil( + width / self.patch_dim[1] + ) + self.length = n_images * len_sequence + + def __getitem__(self, idx) -> Tuple[np.ndarray, np.ndarray]: + image_idx: int = idx % len(self.coco.images) + sequence_idx: int = int(idx / len(self.coco.images)) + + image_id: int = self.coco.images[image_idx]["id"] + ( + image, + bboxes, + mask, + class_labels, + ) = self.coco.get_semantic_segmentation_info_for_image(image_id) + + height, width, _ = image.shape + n_windows_vertical = math.ceil(height / self.patch_dim[0]) + n_windows_horizontal = math.ceil(width / self.patch_dim[1]) + + # + x1 = self.patch_dim[1] * (sequence_idx % n_windows_horizontal) + y1 = self.patch_dim[0] * (sequence_idx // n_windows_vertical) + x2 = x1 + self.patch_dim[1] + y2 = y1 + self.patch_dim[0] + + if x2 > width: + x1 = width - self.patch_dim[1] + x2 = width + if y2 > height: + y1 = height - self.patch_dim[0] + y2 = height + + transform = A.Compose( + [ + A.Crop(x1, y1, x2, y2), + ], + bbox_params=A.BboxParams( + format="pascal_voc", label_fields=["class_labels"] + ), + ) + augmented = transform( + image=image, bboxes=bboxes, mask=mask, class_labels=class_labels + ) + + image, bboxes, mask, class_labels = ( + augmented["image"], + augmented["bboxes"], + augmented["mask"], + augmented["class_labels"], + ) + + return image, bboxes, mask, class_labels + + def __len__(self): + return self.length + + +class SemanticSegmentationPyTorchDataset(torch.utils.data.Dataset): + + _available_patch_strategies = set( + ["resize", "deterministic_center_crop", "crop_all"] + ) + + # NC24sv3 Azure VMs have 440GiB of RAM + # This allows the SemanticSegmentationPyTorchDataset to be stored in memory + # However, when multiple workers are used in PyTorch Dataloader, + # a separate deepcopy of the dataset is made per instance + # Thus, disk is currently the only shared memory pool between processes + _available_cache_strategies = set([None, "none", "disk"]) + + def __init__( + self, + dataset: Dataset, + labels_filepath: str, + classes: List[int], + annotation_format: str, + root_dir: str, + cache_dir: Optional[str] = None, + augmentation: Optional[Callable] = None, + preprocessing: Optional[Callable] = None, + patch_strategy: str = "deterministic_center_crop", + patch_dim: Optional[Tuple[int, int]] = None, + resize_dim: Optional[Tuple[int, int]] = None, + cache_strategy: Optional[str] = None, + ): + if ( + patch_strategy + not in SemanticSegmentationPyTorchDataset._available_patch_strategies + ): + raise ValueError( + f"Parameter `patch_strategy` must be one of {self._available_patch_strategies}" + ) + + if ( + cache_strategy + not in SemanticSegmentationPyTorchDataset._available_cache_strategies + ): + raise ValueError( + f"Parameter `cache_strategy` must be one of {self._available_cache_strategies}" + ) + + if patch_strategy == "resize" and resize_dim is None: + raise ValueError( + 'Parameter `resize_dim` must not be None if `patch_strategy` is "resize"' + ) + elif ( + patch_strategy == "deterministic_center_crop" and patch_dim is None + ): + raise ValueError( + 'Parameter `patch_dim` must not be None if `patch_strategy` is "deterministic_center_crop"' + ) + elif patch_strategy == "crop_all" and patch_dim is None: + raise ValueError( + 'Parameter `patch_dim` must not be None if `patch_strategy is "crop_all"' + ) + + if patch_strategy == "resize": + self.dataset = SemanticSegmentationResizeDataset( + dataset, resize_dim + ) + elif patch_strategy == "deterministic_center_crop": + self.dataset = ( + SemanticSegmentationWithDeterministicPatchingDataset( + dataset, patch_dim + ) + ) + elif patch_strategy == "crop_all": + self.dataset = SemanticSegmentationDatasetFullCoverage( + dataset, patch_dim + ) + + self.root_dir = root_dir + self.cache_dir = cache_dir + self.augmentation = augmentation + self.preprocessing = preprocessing + + self.cache_strategy = cache_strategy + + def _get_cache_filepath_for_disk(self, idx: int): + cache_filepath = Path(join(self.cache_dir, f"{idx}.npz")) + return cache_filepath + + def _read_item_from_disk(self, idx: int) -> SemanticSegmentationItem: + cache_filepath = self._get_cache_filepath_for_disk(idx) + + loaded = np.load(cache_filepath) + image, bboxes, mask, class_labels = ( + loaded["image"], + loaded["bboxes"], + loaded["mask"], + loaded["class_labels"], + ) + return image, bboxes, mask, class_labels + + def _write_item_to_disk( + self, + idx: int, + image: np.ndarray, + bboxes: List[BoundingBox], + mask: np.ndarray, + class_labels: List[int], + ): + cache_filepath = self._get_cache_filepath_for_disk(idx) + + cache_filepath.parent.mkdir(parents=True, exist_ok=True) + np.savez_compressed( + cache_filepath, + image=image, + bboxes=bboxes, + mask=mask, + class_labels=class_labels, + ) + + def __getitem__(self, idx: int) -> Tuple[np.ndarray, np.ndarray]: + """ """ + if ( + self.cache_strategy == "disk" + and self._get_cache_filepath_for_disk(idx).exists() + ): + image, bboxes, mask, class_labels = self._read_item_from_disk(idx) + else: + image, bboxes, mask, class_labels = self.dataset[idx] + + # Minimal memory needed + image = image.astype("float32") + mask = mask.astype("int8") + + # apply preprocessing + if self.preprocessing: + sample = self.preprocessing(image=image, mask=mask) + image, mask = ( + sample["image"], + sample["mask"], + ) + + if self.cache_strategy == "disk": + self._write_item_to_disk( + idx, image, bboxes, mask, class_labels + ) + + # apply augmentations via albumentations + if self.augmentation: + # Currently albumentations CropNonEmptyMaskIfExists does not support bboxes so we do not use below + # augment bboxes and class_labels + # GitHub Issue: https://github.com/albumentations-team/albumentations/issues/461 + sample = self.augmentation(image=image, mask=mask) + image, mask = ( + sample["image"], + sample["mask"], + ) + + # PyTorch images should be in (channel, height, width) + # Images are normally in (height, width, channels) + image = np.moveaxis(image, [0, 1, 2], [1, 2, 0]) + + return image, mask + + def __len__(self): + return len(self.dataset) + + +class ToySemanticSegmentationDataset(torch.utils.data.Dataset): + """Toy semantic segmentation dataset for integration testing purposes""" + + def __init__(self, *args, **kwargs): + self._dataset = SemanticSegmentationPyTorchDataset(*args, **kwargs) + + def __getitem__(self, idx): + return self._dataset[idx] + + def __len__(self): + return min(len(self._dataset), 8) diff --git a/contrib/segmentation/src/losses/__init__.py b/contrib/segmentation/src/losses/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/src/losses/loss.py b/contrib/segmentation/src/losses/loss.py new file mode 100644 index 000000000..ddd290b82 --- /dev/null +++ b/contrib/segmentation/src/losses/loss.py @@ -0,0 +1,73 @@ +from typing import Dict, List, Tuple + +import numpy as np +import torch +from joblib import Parallel, delayed +from torch.utils.data.dataset import Dataset + + +def semantic_segmentation_class_balancer(dataset: Dataset) -> torch.Tensor: + """Semantic Segmentation Class Balancer + + Parameters + ---------- + dataset : Dataset + PyTorch Dataset to balance classes for + + Returns + ------- + weights : Tensor + Tensor of size C corresponding to the number of classes + """ + + def f(i): + class_id_to_class_counts = {} + _, mask = dataset[i] + # Long running function for mask size + classes, class_counts = np.unique(mask, return_counts=True) + size = len(mask.reshape(-1)) + + for j in range(len(classes)): + if classes[j] not in class_id_to_class_counts: + class_id_to_class_counts[classes[j]] = class_counts[j] + else: + class_id_to_class_counts[classes[j]] += class_counts[j] + + return (class_id_to_class_counts, size) + + # Calculate the number of each class (by pixel count) in each mask in parallel + class_count_info: List[Tuple[Dict, int]] = Parallel(n_jobs=-1)( + delayed(f)(i) for i in range(len(dataset)) + ) + + class_id_to_total_counts = {} + total_size = 0 + + # Synchronized summation over the class counts and size of mask + for class_id_to_class_counts, size in class_count_info: + for class_id, count in class_id_to_class_counts.items(): + class_id = int(class_id) + if class_id not in class_id_to_total_counts: + class_id_to_total_counts[class_id] = count + else: + class_id_to_total_counts[class_id] += count + total_size += size + + # Normalize the class counts based on the total size + class_id_to_total_counts = { + class_id: count / total_size + for class_id, count in class_id_to_total_counts.items() + } + + # Weight scaling calculation. It should be inversely proportional to the number + # of each class + weights_length = int(max(class_id_to_total_counts.keys())) + 1 + weights = [0] * weights_length + for class_id in range(weights_length): + if class_id not in class_id_to_total_counts: + weights[class_id] = 0 + else: + # Weights should be scaled larger for those with lower counts + weights[class_id] = 1 - class_id_to_total_counts[class_id] + + return torch.Tensor(weights) diff --git a/contrib/segmentation/src/metrics/__init__.py b/contrib/segmentation/src/metrics/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/src/metrics/metrics.py b/contrib/segmentation/src/metrics/metrics.py new file mode 100644 index 000000000..433bd179f --- /dev/null +++ b/contrib/segmentation/src/metrics/metrics.py @@ -0,0 +1,98 @@ +import logging +from typing import Dict, List + +import torch +from torchmetrics import F1, Accuracy, IoU, MetricCollection, Precision, Recall + + +def get_semantic_segmentation_metrics( + num_classes: int, thresholds: List[float] = [0.5, 0.3] +) -> MetricCollection: + """Construct MetricCollection of Segmentation Metrics + + Parameters + ---------- + num_classes : int + Number of classes + thresholds : List[float] + List of thresholds for different IOU computing + + Returns + ------- + metrics : torchmetrics.MetricCollection + Collection of Segmentation metrics + """ + metrics = { + "mean_accuracy": Accuracy(num_classes=num_classes, mdmc_average="global"), + "per_class_accuracy": Accuracy( + num_classes=num_classes, average="none", mdmc_average="global" + ), + "mean_precision": Precision(num_classes=num_classes, mdmc_average="global"), + "per_class_precision": Precision( + num_classes=num_classes, average="none", mdmc_average="global" + ), + "mean_recall": Recall(num_classes=num_classes, mdmc_average="global"), + "per_class_recall": Recall( + num_classes=num_classes, average="none", mdmc_average="global" + ), + "mean_f1": F1(num_classes=num_classes, mdmc_average="global"), + "per_class_f1": F1( + num_classes=num_classes, average="none", mdmc_average="global" + ), + } + + for threshold in thresholds: + threshold_string = str(threshold).replace(".", "_") + metrics[f"mean_iou_{threshold_string}"] = IoU( + num_classes=num_classes, reduction="elementwise_mean", threshold=threshold + ) + metrics[f"per_class_iou_{threshold_string}"] = IoU( + num_classes=num_classes, reduction="none", threshold=threshold + ) + + print(metrics) + + return MetricCollection(metrics) + + +def log_metrics(results: Dict[str, torch.Tensor], classes: List[str], split: str): + """Log metrics to stdout and AML + + Parameters + ---------- + results : Dict + Key is the name of the metric, value is a metric tensor + If the metric is a mean, it is a 0-dim tensor + If the metric is per class, it is a C-dim tensor (C for number of classes) + split : {"train", "val", "test"} + Split that the metrics are for + """ + # Import does not appear to work on some non-AML environments + from azureml.core.run import Run, _SubmittedRun + + # Get script logger + log = logging.getLogger(__name__) + + # Get AML context + run = Run.get_context() + + split = split.capitalize() + + for metric_name, result in results.items(): + log_name = f"[{split}] {metric_name}" + if "mean" in metric_name: + result = float(result) + + if isinstance(run, _SubmittedRun): + run.parent.log(log_name, result) + run.log(log_name, result) + elif "per_class" in metric_name: + result = {c: float(r) for c, r in zip(classes, result)} + + # Steps are children the experiment they belong to so the parent + # also needs a log + if isinstance(run, _SubmittedRun): + run.parent.log_row(log_name, **result) + run.log_row(log_name, **result) + + log.info(f"{log_name}: {result}") diff --git a/contrib/segmentation/src/models/__init__.py b/contrib/segmentation/src/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/src/models/deeplabv3.py b/contrib/segmentation/src/models/deeplabv3.py new file mode 100644 index 000000000..e64db23c2 --- /dev/null +++ b/contrib/segmentation/src/models/deeplabv3.py @@ -0,0 +1,17 @@ +import torchvision +from torchvision.models.segmentation.deeplabv3 import DeepLabHead + + +def get_deeplabv3(n_classes: int, pretrained: bool = False, is_feature_extracting: bool = False): + model = torchvision.models.segmentation.deeplabv3_resnet50( + pretrained=pretrained, aux_loss=True + ) + + if is_feature_extracting: + for param in model.parameters(): + param.requires_grad = False + + model.classifier = DeepLabHead(2048, n_classes) + model.aux_classifier = DeepLabHead(1024, n_classes) + + return model \ No newline at end of file diff --git a/contrib/segmentation/src/models/fcn_resnet50.py b/contrib/segmentation/src/models/fcn_resnet50.py new file mode 100644 index 000000000..3a7455221 --- /dev/null +++ b/contrib/segmentation/src/models/fcn_resnet50.py @@ -0,0 +1,27 @@ +import torchvision +from torchvision.models.segmentation.fcn import FCNHead + + +def get_fcn_resnet50(n_classes, pretrained=True, is_feature_extracting: bool = False): + """Load Fully Convolutional Network with ResNet-50 backbone + + Parameters + ---------- + n_classes : int + Number of classes + pretrained : bool + True if model should use pre-trained weights from COCO + is_feature_extracting : bool + True if the convolutional layers should be set to non-trainable retaining their original + parameters + """ + model = torchvision.models.segmentation.fcn_resnet50(pretrained=pretrained) + + if is_feature_extracting: + for param in model.parameters(): + param.requires_grad = False + + model.classifier = FCNHead(2048, n_classes) + model.aux_classifier = FCNHead(1024, n_classes) + + return model diff --git a/contrib/segmentation/src/preprocessing/__init__.py b/contrib/segmentation/src/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/src/preprocessing/bbox.py b/contrib/segmentation/src/preprocessing/bbox.py new file mode 100644 index 000000000..2e5863fd9 --- /dev/null +++ b/contrib/segmentation/src/preprocessing/bbox.py @@ -0,0 +1,236 @@ +import warnings +from dataclasses import dataclass +from typing import List, Tuple, Union + + +@dataclass +class CocoBoundingBox: + """BoundingBox in the standard COCO format + + Each attribute is in absolute coordinates to the size of the original image + + Parameters + ---------- + x : float + x-coordinate of the top left of the bounding box + y : float + y-coordinate of the top left of the bounding box + width : float + Width of the bounding box + height : float + Height of the bounding box + absolute : bool + True if in absolute coordinates which corresponds to the actual pixel amount + False if in normalized coordinates which is normalized to the image's current size + """ + + x: float + y: float + width: float + height: float + absolute: bool + + +@dataclass +class PascalVocBoundingBox: + """Bounding Box in Pascal VOC format + + Parameters + ---------- + x1 : float + x-coordinate of the top left of the bounding box + y1 : float + y-coordinate of the top left of the bounding box + x2 : float + x-coordinate of the bottom right of the bounding box + y2 : float + y-coordinate of the bottom right of the bounding box + absolute : bool + True if in absolute coordinates which corresponds to the actual pixel amount + False if in normalized coordinates which is normalized to the image's current size + """ + + x1: float + y1: float + x2: float + y2: float + absolute: bool + + +def coco_to_pascal_voc( + coco_bbox: Tuple[int, int, int, int] +) -> Tuple[float, float, float, float]: + """COCO to Pascal VOC Data Format Conversion + + COCO Bounding Box: (x-top-left, y-top-left, width, height) + Pascal VOC Bounding Box: (x-top-left, y-top-left, x-bottom-right, y-bottom-right) + + Parameters + ---------- + bbox : tuple + COCO Bounding Box + + Returns + ------- + pascal_voc_bbox : tuple + Pascal VOC Bounding Box + """ + x_top_left, y_top_left, width, height = coco_bbox + + x_bottom_right = x_top_left + width + y_bottom_right = y_top_left + height + + return x_top_left, y_top_left, x_bottom_right, y_bottom_right + + +def pascal_voc_to_coco(bbox: Union[Tuple, PascalVocBoundingBox]): + if isinstance(bbox, tuple): + bbox = PascalVocBoundingBox(bbox[0], bbox[1], bbox[2], bbox[3], True) + + width = bbox.x2 - bbox.x1 + height = bbox.y2 - bbox.y1 + + return bbox.x1, bbox.y1, width, height + + +def normalized_pascal_voc_bbox_to_abs_bbox( + bbox: Union[Tuple, PascalVocBoundingBox], image_width: int, image_height: int +) -> PascalVocBoundingBox: + """ + Pascal VOC Bounding Box with normalized coordinates (percentages based on image size) + to absolute coordinates + + Parameters + ---------- + bbox : tuple or PascalVocBoundingBox + Bounding Box in Pascal VOC format with normalized coordinates + image_width : int + Width of image to use for absolute coordinates + image_height : int + Height of image to use for absolute coordinates + + Returns + ------- + bbox : PascalVocBoundingBox + Bounding Box with absolute coordinates based on the image + """ + if isinstance(bbox, tuple): + bbox = PascalVocBoundingBox(bbox[0], bbox[1], bbox[2], bbox[3], True) + + bbox.x1 *= image_width + bbox.y1 *= image_height + bbox.x2 *= image_width + bbox.y2 *= image_height + + return bbox + + +def abs_bbox_to_normalized_pascal_voc( + bbox: Union[Tuple, PascalVocBoundingBox], image_width: int, image_height: int +) -> PascalVocBoundingBox: + """ + Pascal VOC Bounding Box with normalized coordinates (percentages based on image size) + to absolute coordinates + + Parameters + ---------- + bbox : tuple or PascalVocBoundingBox + Bounding Box in Pascal VOC format with normalized coordinates + image_width : int + Width of image to use for absolute coordinates + image_height : int + Height of image to use for absolute coordinates + + Returns + ------- + bbox : PascalVocBoundingBox + Bounding Box with absolute coordinates based on the image + """ + if isinstance(bbox, tuple): + bbox = PascalVocBoundingBox(bbox[0], bbox[1], bbox[2], bbox[3], True) + + bbox.x1 /= image_width + bbox.y1 /= image_height + bbox.x2 /= image_width + bbox.y2 /= image_height + + return bbox + + +def convert_bbox( + bbox: Union[Tuple, CocoBoundingBox, PascalVocBoundingBox], + source_format: str, + target_format: str, + image_width: int = None, + image_height: int = None, +) -> List[float]: + """Convert a Bounding Box to another format + + Parameters + ---------- + bbox : tuple or CocoBoundingBox or PascalVocBoundingBox + Bounding box to convert to a different format + source_format : {'coco', 'pascal_voc', 'aml_coco'} + Format of `bbox` + target_format : {'coco', 'pascal_voc', 'aml_coco'} + Format to convert `bbox` to + image_width : int + Width of the image that `bbox` is for. + Required if source_format or target_format is 'aml_coco' + image_height : int + Height of the image that `bbox` is for + Required if source_format or target_format is 'aml_coco' + + Returns + ------- + bbox : list of float + Bounding Box in format specified by `target_format` + + Raises + ------ + ValueError + If source_format or target_format is not one of 'coco', 'pascal_voc', or 'aml_coco' + NotImplementedError + """ + bbox_formats = set(["coco", "pascal_voc", "aml_coco"]) + if source_format not in bbox_formats: + raise ValueError(f"Invalid source_format. Expected one of: {bbox_formats}") + if target_format not in bbox_formats: + raise ValueError(f"Invalid target_format. Expected one of: {bbox_formats}") + + if source_format == "aml_coco" or target_format == "aml_coco": + if image_width is None or image_height is None: + raise ValueError( + "If source_format or target_format is 'aml_coco' then image_width and image_height must be specified" + ) + + if source_format == target_format: + warnings.warn( + "Parameter source_format and target_format are the same. No conversion was necessary" + ) + return bbox + + # The intermediate bounding box should always be converted to "pascal_voc" + # This allows the many to many conversion between all bounding box types to be done + # through the implementation of one conversion to and from "pascal_voc" + + if source_format == "coco": + raise NotImplementedError + elif source_format == "pascal_voc": + raise NotImplementedError + elif source_format == "aml_coco": + bbox = coco_to_pascal_voc(bbox) + bbox = normalized_pascal_voc_bbox_to_abs_bbox( + bbox, image_width=image_width, image_height=image_height + ) + + if target_format == "pascal_voc": + pass + elif target_format == "coco": + bbox = [bbox.x1, bbox.y1, bbox.x2, bbox.y2] + elif target_format == "aml_coco": + bbox = abs_bbox_to_normalized_pascal_voc( + bbox, image_width=image_width, image_height=image_height + ) + + return bbox diff --git a/contrib/segmentation/src/preprocessing/coco.py b/contrib/segmentation/src/preprocessing/coco.py new file mode 100644 index 000000000..9797cb090 --- /dev/null +++ b/contrib/segmentation/src/preprocessing/coco.py @@ -0,0 +1,92 @@ +""" +Utilities for working with COCO json files +""" +from typing import Dict + +import pandas as pd + + +def coco_json_to_pandas_dataframe(coco_json: Dict) -> pd.DataFrame: + """Serialize COCO json to pandas dataframe + + Parameters + ---------- + coco_json : Dict + JSON in COCO format + + Returns + ------- + annotations_df : pd.DataFrame + DataFrame with `images`, `annotations`, and `categories` information from the COCO file + """ + + # Images section + images_df = pd.DataFrame(coco_json["images"]) + images_df = images_df.rename( + columns={"id": "image_id", "file_name": "filepath"} + ) + + # Categories section + categories_df = pd.DataFrame(coco_json["categories"]) + categories_df = categories_df.rename( + columns={"id": "category_id", "name": "category_name"} + ) + + # Annotations section + annotations_df = pd.DataFrame(coco_json["annotations"]) + annotations_df = annotations_df.merge(images_df, on="image_id") + annotations_df = annotations_df.merge(categories_df, on="category_id") + + return annotations_df + + +def pandas_dataframe_to_coco_json(annotations_df: pd.DataFrame) -> Dict: + """Serialize and write out a pandas dataframe into COCO json format + + Parameters + ---------- + annotations_df : pd.DataFrame + DataFrame of annotations from a COCO json file + + Returns + ------- + coco_json : Dict + JSON representation of the annotations dataframe + """ + + images_df = annotations_df[ + [ + "image_id", + "width", + "height", + "filepath", + "coco_url", + "absolute_url", + "date_captured", + ] + ] + images_df = images_df.rename( + columns={"image_id": "id", "filepath": "file_name"} + ) + images_df = images_df.drop_duplicates() + images = images_df.to_dict(orient="records") + + categories_df = annotations_df[["category_id", "category_name"]] + categories_df = categories_df.rename( + columns={"category_id": "id", "category_name": "name"} + ) + categories_df = categories_df.drop_duplicates() + categories = categories_df.to_dict(orient="records") + + annotations_df = annotations_df[ + ["segmentation", "id", "category_id", "image_id", "area", "bbox"] + ] + annotations = annotations_df.to_dict(orient="records") + + coco_json = { + "images": images, + "annotations": annotations, + "categories": categories, + } + + return coco_json diff --git a/contrib/segmentation/src/preprocessing/segmentation.py b/contrib/segmentation/src/preprocessing/segmentation.py new file mode 100644 index 000000000..5d24ddbb0 --- /dev/null +++ b/contrib/segmentation/src/preprocessing/segmentation.py @@ -0,0 +1,218 @@ +import json +import warnings +from typing import List, Union + +import numpy as np +import pycocotools.mask as m +from pycocotools.coco import COCO + + +def absolute_mask_to_normalized_mask( + segmentation: List[float], width: int, height: int +): + """Convert segmentation map from absolute to normalized coordinates + + Parameters + ---------- + segmentation : list of float + Segmentation map in absolute coordinates + width : int + Width of image + height : int + Height of image + + Returns + ------- + segmentation : list of float + Segmentation map converted to normalized coordinates + """ + # This function treats the original copy of segmentation as immutable + segmentation = segmentation.copy() + + # Segmentation is a list of even length with every 2 entries being (x, y) coordinates + # of the next point to construct the polygon + for i in range(0, len(segmentation), 2): + segmentation[i] /= width + segmentation[i + 1] /= height + return segmentation + + +def normalized_mask_to_absolute_mask( + segmentation: List[float], width: int, height: int +) -> List[float]: + """Convert segmentation map from normalized to absolute coordinates + + Parameters + ---------- + segmentation : list of float + Segmentation map in normalized coordinates + width : int + Width of image + height : int + Height of image + + Returns + ------- + segmentation : list of float + Segmentation map converted to absolute coordinates + """ + # This function treats the original copy of segmentation as immutable + segmentation = segmentation.copy() + + # Segmentation is a list of even length with every 2 entries being (x, y) coordinates + # of the next point to construct the polygon + for i in range(0, len(segmentation), 2): + segmentation[i] = np.round(segmentation[i] * width) + segmentation[i + 1] = np.round(segmentation[i + 1] * height) + return segmentation + + +def convert_segmentation( + segmentation: List[Union[float, int]], + source_format: str, + target_format: str, + image_width: int, + image_height: int, +): + """Convert a Segmentation Map to another format + + Parameters + ---------- + segmentation : list of float or int + Segmentation map in format `source_format` + source_format : {'coco', 'aml_coco'} + Format of `segmentation` + target_format : {'coco', 'aml_coco'} + Format of `segmentation` + image_width : int + Width of the image that `segmentation` is for + image_height : int + Height of image that `segmentation` is for + + Returns + ------- + segmentation : list of float or int + Segmentation map converted to `target_format` + """ + mask_formats = set(["coco", "aml_coco", "yolo"]) + if source_format not in mask_formats: + raise ValueError(f"Invalid source_format. Expected one of {mask_formats}") + if target_format not in mask_formats: + raise ValueError(f"Invalid target_format. Expected one of {mask_formats}") + + if source_format == target_format: + warnings.warn( + "Parameter source_format and target_format are the same. No conversion was necessary" + ) + return segmentation + + # The intermediate segmentation mask will always be "coco" + if source_format == "aml_coco" or source_format == "yolo": + segmentation = normalized_mask_to_absolute_mask( + segmentation, width=image_width, height=image_height + ) + elif source_format == "coco": + # Our intermediate format is coco, so we don't need to do anything + pass + + if target_format == "aml_coco" or target_format == "yolo": + segmentation = absolute_mask_to_normalized_mask( + segmentation, width=image_width, height=image_height + ) + elif target_format == "coco": + pass + + return segmentation + + +def mask_generator(annotations_filepath): + mask_builder = MaskBuilder(annotations_filepath=annotations_filepath) + + for image_id, image_json in mask_builder.coco.imgs.items(): + yield (image_json, mask_builder.construct_mask_for_image(image_id)) + + +def mask_reader(mask_filepath: str) -> np.ndarray: + """Read mask from filesystem. + Masks are stored in RLE format so they are decoded before being returned + + Parameters + ---------- + mask_filepath : str + Filepath to read segmentation mask from + + Returns + ------- + mask : np.ndarray + Segmentation mask in 2D array + """ + mask_json = json.load(open(mask_filepath, "r")) + mask_json["counts"] = mask_json["counts"].encode() + mask: np.ndarray = m.decode(mask_json) + return mask + + +def mask_writer(mask: np.ndarray, mask_filepath: str): + """Write segmentation masks to filesystem + RLE is a lossless compression format that is well suited for segmentation masks which are + by nature sparse 2D arrays + + Parameters + ---------- + mask : np.ndarray + Segmentation mask to write out + mask_filepath : str + Filepath to write segmentation mask to + """ + mask = np.asfortranarray(mask) + mask_json = m.encode(mask) + mask_json["counts"] = mask_json["counts"].decode("utf-8") + json.dump(mask_json, open(mask_filepath, "w")) + + +class MaskBuilder: + def __init__(self, annotations_filepath): + self.coco = COCO(annotation_file=annotations_filepath) + self.category_ids = self.coco.cats.keys() + + def construct_mask_for_image(self, image_id: int) -> np.ndarray: + """Construct segmentation mask with all annotations for image with id `image_id` + + Parameters + ---------- + image_id : int + Id of image to construct mask for + + Returns + ------- + mask : np.ndarray + Mask array with same shape as image + Entires are the category_id for where there are instances of them + """ + annotation_ids = self.coco.getAnnIds( + imgIds=[image_id], catIds=self.category_ids + ) + annotations = self.coco.loadAnns(annotation_ids) + image = self.coco.imgs[image_id] + + # Initialize a zero mask + mask = np.zeros((int(image["height"]), int(image["width"])), dtype=np.uint8) + + # Add each annotation to the initial mask + for i in range(len(annotations)): + category_id = annotations[i]["category_id"] + + # The annotated mask is the same shape as our initial mask + # Entries with the value of "category_id" indicate the presence of that + # class in that location + annotated_mask = self.coco.annToMask(annotations[i]) + annotated_mask = annotated_mask * category_id + annotated_mask = annotated_mask.astype(np.uint8) + + # The masks are combined together as the 0 mask will always be overwritten by + # actual classes + # In the case of overlap, the class with the higher category_id is written + mask = np.maximum(annotated_mask, mask) + + mask = mask.astype(np.uint8) + return mask diff --git a/contrib/segmentation/src/train/__init__.py b/contrib/segmentation/src/train/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/src/train/class_balancer.py b/contrib/segmentation/src/train/class_balancer.py new file mode 100644 index 000000000..11a875457 --- /dev/null +++ b/contrib/segmentation/src/train/class_balancer.py @@ -0,0 +1,73 @@ +from typing import Dict, List, Tuple + +import numpy as np +import torch +from joblib import Parallel, delayed +from torch.utils.data.dataset import Dataset + + +def semantic_segmentation_class_balancer(dataset: Dataset) -> torch.Tensor: + """Semantic Segmentation Class Balancer + + Parameters + ---------- + dataset : Dataset + PyTorch Dataset to balance classes for + + Returns + ------- + weights : Tensor + Tensor of size C corresponding to the number of classes + """ + + def f(i): + class_id_to_class_counts = {} + _, mask = dataset[i] + # Long running function for mask size + classes, class_counts = np.unique(mask, return_counts=True) + size = len(mask.reshape(-1)) + + for j in range(len(classes)): + if classes[j] not in class_id_to_class_counts: + class_id_to_class_counts[classes[j]] = class_counts[j] + else: + class_id_to_class_counts[classes[j]] += class_counts[j] + + return (class_id_to_class_counts, size) + + # Calculate the number of each class (by pixel count) in each mask in parallel + class_count_info: List[Tuple[Dict, int]] = Parallel(n_jobs=-1)( + delayed(f)(i) for i in range(len(dataset)) + ) + + class_id_to_total_counts = {} + total_size = 0 + + # Synchronized summation over the class counts and size of mask + for class_id_to_class_counts, size in class_count_info: + for class_id, count in class_id_to_class_counts.items(): + class_id = int(class_id) + if class_id not in class_id_to_total_counts: + class_id_to_total_counts[class_id] = count + else: + class_id_to_total_counts[class_id] += count + total_size += size + + # Normalize the class counts based on the total size + class_id_to_total_counts = { + class_id: count / total_size + for class_id, count in class_id_to_total_counts.items() + } + + # Weight scaling calculation. It should be inversely proportional to the number + # of each class + weights_length = int(max(class_id_to_total_counts.keys())) + 1 + weights = [0] * weights_length + for class_id in range(weights_length): + if class_id not in class_id_to_total_counts: + weights[class_id] = 0 + else: + # Weights should be scaled larger for those with lower counts + weights[class_id] = 1 - class_id_to_total_counts[class_id] + + return torch.Tensor(weights) \ No newline at end of file diff --git a/contrib/segmentation/src/visualization/__init__.py b/contrib/segmentation/src/visualization/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/src/visualization/distribution.py b/contrib/segmentation/src/visualization/distribution.py new file mode 100644 index 000000000..a6546ade2 --- /dev/null +++ b/contrib/segmentation/src/visualization/distribution.py @@ -0,0 +1,112 @@ +from typing import Callable, List, Tuple + +import matplotlib.pyplot as plt +import numpy as np +from joblib import Parallel, delayed +from tqdm import tqdm + +from ..datasets.coco import CocoDataset +from ..datasets.coco_utils import annotation_to_mask_array + + +def annotation_size_distribution( + category: str, train_annotation_sizes, val_annotation_sizes, filter_func: Callable +): + fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(12, 6)) + + def filter_annotations(annotation_sizes): + annotation_sizes = list(filter(filter_func, annotation_sizes)) + annotation_sizes = [y[1] for y in annotation_sizes] + return annotation_sizes + + train_annotation_sizes = filter_annotations(train_annotation_sizes) + val_annotation_sizes = filter_annotations(val_annotation_sizes) + + max_bin = max(max(train_annotation_sizes), max(val_annotation_sizes)) + bins = np.logspace(np.log10(10), np.log10(max_bin), 50) + + axs[0].hist(train_annotation_sizes, bins=bins) + axs[0].set_title(f"[Train] {category} Annotation Size Distribution") + axs[0].set_xlabel("Annotation Size") + axs[0].set_xscale("log") + axs[0].set_ylabel("Count") + + axs[1].hist(val_annotation_sizes, bins=bins) + axs[1].set_title(f"[Val] {category} Annotation Size Distribution") + axs[1].set_xlabel("Annotation Size") + axs[1].set_xscale("log") + axs[1].set_ylabel("Count") + + fig.show() + + +def get_annotation_sizes(dataset: CocoDataset): + def f(i): + return get_annotation_size(dataset, i) + + class_id_and_annotation_size: List[Tuple[int, int]] = Parallel(n_jobs=-1)( + delayed(f)(i) for i in tqdm(range(len(dataset.annotations))) + ) + return class_id_and_annotation_size + + +def get_annotation_size(dataset: CocoDataset, annotation_idx: int): + """Get annotation size + + Parameters + ---------- + dataset : CocoDataset + Dataset with coco annotations + annotation_idx : int + Index for annotations + + Returns + ------- + class_id_to_annotation_size + """ + annotation = dataset.annotations[annotation_idx] + image_id: int = annotation["image_id"] + image_json = dataset.images_by_image_id[image_id] + mask = annotation_to_mask_array( + width=image_json["width"], + height=image_json["height"], + annotations=[annotation], + classes=dataset.classes, + annotation_format=dataset.annotation_format, + ) + class_id_to_annotation_size = get_mask_distribution(mask) + + # Each mask should only contain the annotation and potentially background + assert len(class_id_to_annotation_size) <= 2 + class_id_to_annotation_size = iter(class_id_to_annotation_size.items()) + + # First item is background, second is our annotation + (class_id, annotation_size) = next(class_id_to_annotation_size) + # Double checking above claim + if class_id == 0: + (class_id, annotation_size) = next(class_id_to_annotation_size) + + return class_id, annotation_size + + +def get_mask_distribution(mask: np.ndarray): + """Get distribution of labels (pixels) in mask + + Parameters + ---------- + mask : np.ndarray + Mask to get distribution for + + Returns + ------- + class_id_to_class_counts : Dict[int, int] + Mapping from class id to total number of pixels in mask for given class + """ + class_id_to_class_counts = {} + classes, class_counts = np.unique(mask, return_counts=True) + for i in range(len(classes)): + if classes[i] not in class_id_to_class_counts: + class_id_to_class_counts[classes[i]] = class_counts[i] + else: + class_id_to_class_counts[classes[i]] += class_counts[i] + return class_id_to_class_counts diff --git a/contrib/segmentation/tests/__init__.py b/contrib/segmentation/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/tests/azure/__init__.py b/contrib/segmentation/tests/azure/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/tests/azure/test_data_labeling.py b/contrib/segmentation/tests/azure/test_data_labeling.py new file mode 100644 index 000000000..04ec03f1c --- /dev/null +++ b/contrib/segmentation/tests/azure/test_data_labeling.py @@ -0,0 +1,12 @@ +from typing import Dict + +from src.azure.data_labeling import aml_coco_labels_to_standard_coco + + +def test_aml_coco_labels_to_standard_coco( + aml_labels_json: Dict, standard_labels_json: Dict +): + labels_json = aml_coco_labels_to_standard_coco(aml_labels_json) + + assert labels_json["annotations"] != aml_labels_json["annotations"] + assert labels_json == standard_labels_json diff --git a/contrib/segmentation/tests/conftest.py b/contrib/segmentation/tests/conftest.py new file mode 100644 index 000000000..0abf2e37a --- /dev/null +++ b/contrib/segmentation/tests/conftest.py @@ -0,0 +1,40 @@ +""" +Configuration file for pytest. +Contains a collection of fixture functions which are run when pytest starts +""" +import json +from os.path import join +from typing import Dict + +import pandas as pd +import pytest + +from src.preprocessing.coco import coco_json_to_pandas_dataframe + + +@pytest.fixture() +def aml_labels_filepath() -> str: + return join("data", "test_data", "labels", "aml_coco_labels.json") + + +@pytest.fixture() +def aml_labels_json(aml_labels_filepath: str) -> Dict: + return json.load(open(aml_labels_filepath, "r")) + + +@pytest.fixture() +def standard_labels_filepath() -> str: + return join("data", "test_data", "labels", "standard_coco_labels.json") + + +@pytest.fixture() +def standard_labels_json(standard_labels_filepath) -> Dict: + return json.load(open(standard_labels_filepath, "r")) + + +@pytest.fixture +def annotations_df(aml_labels_filepath: str) -> pd.DataFrame: + coco_json = json.load(open(aml_labels_filepath, "r")) + annotations_df = coco_json_to_pandas_dataframe(coco_json) + + return annotations_df diff --git a/contrib/segmentation/tests/datasets/__init__.py b/contrib/segmentation/tests/datasets/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/tests/datasets/conftest.py b/contrib/segmentation/tests/datasets/conftest.py new file mode 100644 index 000000000..ac90a83a3 --- /dev/null +++ b/contrib/segmentation/tests/datasets/conftest.py @@ -0,0 +1,15 @@ +import numpy as np +import pytest +from PIL import Image + + +@pytest.fixture +def high_resolution_image() -> Image.Image: + height = 3632 + width = 5456 + channels = 3 + image: np.ndarray = np.random.randint( + low=0, high=256, size=height * width * channels, dtype=np.uint8 + ) + image = image.reshape((height, width, channels)) + return Image.fromarray(image) diff --git a/contrib/segmentation/tests/datasets/test_coco_utils.py b/contrib/segmentation/tests/datasets/test_coco_utils.py new file mode 100644 index 000000000..73da1b92c --- /dev/null +++ b/contrib/segmentation/tests/datasets/test_coco_utils.py @@ -0,0 +1,102 @@ +from typing import Dict, List, Tuple + +import numpy as np +import pytest + +from src.datasets.coco_utils import annotation_to_mask_array + + +def annotation_to_mask_array_cases() -> List[Tuple]: + width = 512 + height = 256 + category_id = 1 + top_left_mask: np.ndarray = np.zeros((height, width), dtype=np.uint8) + # Mask filling is assumed to be inclusive with the coordinates so + 1 + top_left_mask[: height // 2 + 1, : width // 2 + 1] = 1 + top_left_mask_annotation_coco_format = { + "segmentation": [ + [ + 0, + 0, + width // 2, + 0, + width // 2, + height // 2, + 0, + height // 2, + 0, + 0, + ] + ], + "id": 0, + "category_id": category_id, + "image_id": 1, + "area": 65536, + "bbox": [0, 0, width // 2, height // 2], + } + + top_left_mask_annotation_aml_coco_format = { + "segmentation": [ + [ + 0, + 0, + 0.5, + 0, + 0.5, + 0.5, + 0, + 0.5, + 0, + 0, + ] + ], + "id": 0, + "category_id": category_id, + "image_id": 1, + "area": 65536, + "bbox": [0, 0, 0.5, 0.5], + } + + top_left_mask_coco_format_case = ( + width, + height, + [top_left_mask_annotation_coco_format], + [category_id], + "coco", + top_left_mask.copy(), + ) + + top_left_mask_aml_coco_format_case = ( + width, + height, + [top_left_mask_annotation_aml_coco_format], + [category_id], + "aml_coco", + top_left_mask.copy(), + ) + + cases = [ + top_left_mask_coco_format_case, + top_left_mask_aml_coco_format_case, + ] + + return cases + + +@pytest.mark.parametrize( + "width, height, annotations, classes, annotation_format, expected_mask", + annotation_to_mask_array_cases(), +) +def test_annotation_to_mask_array( + width: int, + height: int, + annotations: List[Dict], + classes: List[int], + annotation_format: str, + expected_mask: np.ndarray, +): + mask = annotation_to_mask_array( + width, height, annotations, classes, annotation_format + ) + + assert np.array_equal(mask, expected_mask) diff --git a/contrib/segmentation/tests/datasets/test_semantic_segmentation.py b/contrib/segmentation/tests/datasets/test_semantic_segmentation.py new file mode 100644 index 000000000..6df79db2b --- /dev/null +++ b/contrib/segmentation/tests/datasets/test_semantic_segmentation.py @@ -0,0 +1,100 @@ +from typing import List, Optional, Tuple + +import numpy as np +import pytest +from PIL import Image + +from src.datasets.semantic_segmentation import ( + SemanticSegmentationPyTorchDataset, +) + + +@pytest.mark.parametrize( + "classes, annotation_format, patch_strategy, patch_dim, resize_dim, expected_length", + [ + ( + [1, 2, 3, 4], + "coco", + "deterministic_center_crop", + (256, 256), + None, + 44, + ), + ( + [1, 2, 3, 4], + "coco", + "crop_all", + (256, 256), + None, + 3300, + ), + ( + [2], + "coco", + "deterministic_center_crop", + (256, 256), + None, + 5, + ), + ], +) +def test_semantic_segmentation_dataset( + mocker, + high_resolution_image: Image.Image, + standard_labels_filepath: str, + classes: List[int], + annotation_format: str, + patch_strategy: str, + patch_dim: Optional[Tuple[int, int]], + resize_dim: Optional[Tuple[int, int]], + expected_length: int, +): + mocker.patch( + "src.datasets.semantic_segmentation.Image.open", + return_value=high_resolution_image, + ) + dataset = SemanticSegmentationPyTorchDataset( + standard_labels_filepath, + root_dir="data", + classes=classes, + annotation_format=annotation_format, + patch_strategy=patch_strategy, + patch_dim=patch_dim, + resize_dim=resize_dim, + ) + + assert len(dataset) == expected_length + + if patch_strategy == "crop_all": + h = high_resolution_image.height + w = high_resolution_image.width + + # Patches are taken in a grid like fashion over the image for crop_all + # Testing boundary cases within the grid + boundary_indexes = [ + 0, + h - 1, + h, + h + 1, + w - 1, + w, + w + 1, + h * w - 1, + h * w, + h * w + 1, + len(dataset) - 1, + ] + else: + boundary_indexes = [0, len(dataset) - 1] + + for idx in boundary_indexes: + image, mask = dataset[idx] + assert image.shape == (3,) + patch_dim + assert mask.shape == patch_dim + + # Fuzz-Testing random indexes + random_indexes = np.random.randint(0, len(dataset), size=10) + for idx in random_indexes: + image, mask = dataset[idx] + assert image.shape == (3,) + patch_dim + assert mask.shape == patch_dim diff --git a/contrib/segmentation/tests/losses/__init__.py b/contrib/segmentation/tests/losses/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/tests/losses/conftest.py b/contrib/segmentation/tests/losses/conftest.py new file mode 100644 index 000000000..c1ed3f819 --- /dev/null +++ b/contrib/segmentation/tests/losses/conftest.py @@ -0,0 +1,25 @@ +import numpy as np +import pytest +from torch.utils.data.dataset import Dataset + + +class SemanticSegmentationTestDataset(Dataset): + def __init__(self, height, width): + self.height = height + self.width = width + + def __getitem__(self, idx): + image: np.ndarray = np.zeros((self.height, self.width, 3)) + mask: np.ndarray = np.zeros((self.height, self.width)) + + mask[: self.height // 2, : self.width // 2] = 4 + + return image, mask + + def __len__(self): + return 4 + + +@pytest.fixture +def semantic_segmentation_dataset(): + return SemanticSegmentationTestDataset(256, 256) diff --git a/contrib/segmentation/tests/losses/test_loss.py b/contrib/segmentation/tests/losses/test_loss.py new file mode 100644 index 000000000..c4d8102dd --- /dev/null +++ b/contrib/segmentation/tests/losses/test_loss.py @@ -0,0 +1,11 @@ +from torch.utils.data.dataset import Dataset + +from src.losses.loss import semantic_segmentation_class_balancer + + +def test_semantic_segmentation_class_balancer(semantic_segmentation_dataset: Dataset): + weights = semantic_segmentation_class_balancer(semantic_segmentation_dataset) + + assert len(weights) == 5 + assert weights[0] == 0.25 + assert weights[4] == 0.75 diff --git a/contrib/segmentation/tests/metrics/__init__.py b/contrib/segmentation/tests/metrics/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/tests/metrics/test_metrics.py b/contrib/segmentation/tests/metrics/test_metrics.py new file mode 100644 index 000000000..da2e285bd --- /dev/null +++ b/contrib/segmentation/tests/metrics/test_metrics.py @@ -0,0 +1,191 @@ +from typing import Dict + +import pytest +import torch + +from src.metrics.metrics import get_semantic_segmentation_metrics + + +def get_semantic_segmentation_metrics_cases(): + num_classes = 2 + + pred_all_correct = torch.Tensor( + [ + [ + [ + [0.25, 0.25, 0.75, 0.75], + [0.25, 0.25, 0.75, 0.75], + [0.75, 0.75, 0.75, 0.75], + [0.75, 0.75, 0.75, 0.75], + ], + [ + [0.75, 0.75, 0.25, 0.25], + [0.75, 0.75, 0.25, 0.25], + [0.25, 0.25, 0.25, 0.25], + [0.25, 0.25, 0.25, 0.25], + ], + ] + ] + ) + + ground_truth_all_correct = torch.Tensor( + [ + [ + [1, 1, 0, 0], + [1, 1, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + ], + ] + ).int() + + expected_results_all_correct = { + "mean_accuracy": torch.tensor(1), + "per_class_accuracy": torch.Tensor([1, 1]), + "mean_precision": torch.tensor(1), + "per_class_precision": torch.Tensor([1, 1]), + "mean_recall": torch.tensor(1), + "per_class_recall": torch.Tensor([1, 1]), + "mean_f1": torch.tensor(1), + "per_class_f1": torch.Tensor([1, 1]), + "mean_iou_0_5": torch.tensor(1), + "per_class_iou_0_5": torch.Tensor([1, 1]), + "mean_iou_0_3": torch.tensor(1), + "per_class_iou_0_3": torch.Tensor([1, 1]), + } + + all_correct_case = ( + pred_all_correct, + ground_truth_all_correct, + num_classes, + expected_results_all_correct, + ) + + pred_none_correct = torch.Tensor( + [ + [ + [ + [0.25, 0.25, 0.75, 0.75], + [0.25, 0.25, 0.75, 0.75], + [0.75, 0.75, 0.75, 0.75], + [0.75, 0.75, 0.75, 0.75], + ], + [ + [0.75, 0.75, 0.25, 0.25], + [0.75, 0.75, 0.25, 0.25], + [0.25, 0.25, 0.25, 0.25], + [0.25, 0.25, 0.25, 0.25], + ], + ] + ] + ) + + ground_truth_none_correct = torch.Tensor( + [ + [ + [0, 0, 1, 1], + [0, 0, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 1], + ], + ] + ).int() + + expected_results_none_correct = { + "mean_accuracy": torch.tensor(0), + "per_class_accuracy": torch.Tensor([0, 0]), + "mean_precision": torch.tensor(0), + "per_class_precision": torch.Tensor([0, 0]), + "mean_recall": torch.tensor(0), + "per_class_recall": torch.Tensor([0, 0]), + "mean_f1": torch.tensor(0), + "per_class_f1": torch.Tensor([0, 0]), + "mean_iou_0_5": torch.tensor(0), + "per_class_iou_0_5": torch.Tensor([0, 0]), + "mean_iou_0_3": torch.tensor(0), + "per_class_iou_0_3": torch.Tensor([0, 0]), + } + + none_correct_case = ( + pred_none_correct, + ground_truth_none_correct, + num_classes, + expected_results_none_correct, + ) + + pred_typical_case = torch.Tensor( + [ + [ + [ + [0.25, 0.25, 0.25, 0.25], + [0.25, 0.25, 0.25, 0.25], + [0.75, 0.75, 0.75, 0.75], + [0.75, 0.75, 0.75, 0.75], + ], + [ + [0.75, 0.75, 0.75, 0.75], + [0.75, 0.75, 0.75, 0.75], + [0.25, 0.25, 0.25, 0.25], + [0.25, 0.25, 0.25, 0.25], + ], + ] + ] + ) + + ground_truth_typical_case = torch.Tensor( + [ + [ + [1, 1, 0, 0], + [1, 1, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + ], + ] + ).int() + + expected_results_typical_case = { + "mean_accuracy": torch.tensor(0.75), + "per_class_accuracy": torch.Tensor([2 / 3, 1]), + "mean_precision": torch.tensor(0.75), + "per_class_precision": torch.Tensor([1, 0.5]), + "mean_recall": torch.tensor(0.75), + "per_class_recall": torch.Tensor([2 / 3, 1]), + "mean_f1": torch.tensor(0.75), + "per_class_f1": torch.Tensor([0.8, 2 / 3]), + "mean_iou_0_5": torch.tensor((2 / 3 + 1 / 2) / 2), + "per_class_iou_0_5": torch.Tensor([2 / 3, 0.5]), + "mean_iou_0_3": torch.tensor((2 / 3 + 1 / 2) / 2), + "per_class_iou_0_3": torch.Tensor([2 / 3, 0.5]), + } + + typical_case = ( + pred_typical_case, + ground_truth_typical_case, + num_classes, + expected_results_typical_case, + ) + + cases = [all_correct_case, none_correct_case, typical_case] + return cases + + +@pytest.mark.parametrize( + "preds, ground_truth, num_classes, expected_results", + get_semantic_segmentation_metrics_cases(), +) +def test_get_semantic_segmentation_metrics( + preds: torch.Tensor, + ground_truth: torch.Tensor, + num_classes: int, + expected_results: Dict, +): + metrics = get_semantic_segmentation_metrics(num_classes, thresholds=[0.5, 0.3]) + metrics(preds, ground_truth) + results = metrics.compute() + + assert len(results) > 0 + + for metric_name, result in results.items(): + if "per_class" in metric_name: + assert len(result) == num_classes + assert torch.allclose(result, expected_results[metric_name].float()) diff --git a/contrib/segmentation/tests/preprocessing/__init__.py b/contrib/segmentation/tests/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/segmentation/tests/preprocessing/test_bbox.py b/contrib/segmentation/tests/preprocessing/test_bbox.py new file mode 100644 index 000000000..06620a76d --- /dev/null +++ b/contrib/segmentation/tests/preprocessing/test_bbox.py @@ -0,0 +1,41 @@ +from typing import Tuple, Union + +import pytest + +from src.preprocessing.bbox import PascalVocBoundingBox, convert_bbox + + +@pytest.mark.parametrize( + "bbox, source_format, target_format, image_width, image_height, expected_bbox", + [ + ( + (0.1, 0.2, 0.3, 0.4), + "aml_coco", + "pascal_voc", + 480, + 270, + PascalVocBoundingBox(48, 54, 192, 162, True), + ), + ], +) +def test_convert_bbox( + bbox: Union[Tuple, PascalVocBoundingBox], + source_format: str, + target_format, + image_width: int, + image_height: int, + expected_bbox: Union[Tuple, PascalVocBoundingBox], +): + bbox = convert_bbox( + bbox, + source_format, + target_format, + image_width=image_width, + image_height=image_height, + ) + + if isinstance(expected_bbox, PascalVocBoundingBox): + assert pytest.approx(bbox.x1, 1e-8) == expected_bbox.x1 + assert pytest.approx(bbox.y1, 1e-8) == expected_bbox.y1 + assert pytest.approx(bbox.x2, 1e-8) == expected_bbox.x2 + assert pytest.approx(bbox.y2, 1e-8) == expected_bbox.y2 diff --git a/contrib/segmentation/tests/preprocessing/test_coco.py b/contrib/segmentation/tests/preprocessing/test_coco.py new file mode 100644 index 000000000..4d74994d8 --- /dev/null +++ b/contrib/segmentation/tests/preprocessing/test_coco.py @@ -0,0 +1,50 @@ +from typing import Dict + +import pandas as pd + +from src.preprocessing.coco import ( + coco_json_to_pandas_dataframe, + pandas_dataframe_to_coco_json, +) + + +def test_pandas_dataframe_to_coco_json(annotations_df: pd.DataFrame): + coco_json = pandas_dataframe_to_coco_json(annotations_df) + + # Check top level keys + assert "images" in coco_json.keys() + assert "annotations" in coco_json.keys() + assert "categories" in coco_json.keys() + + # Check for one image if the correct keys are there + image_json: Dict = coco_json["images"][0] + assert "id" in image_json.keys() + assert "width" in image_json.keys() + assert "height" in image_json.keys() + assert "file_name" in image_json.keys() + assert "coco_url" in image_json.keys() + assert "absolute_url" in image_json.keys() + assert "date_captured" in image_json.keys() + + # Check for one annotation if correct keys are there + annotation_json: Dict = coco_json["annotations"][0] + assert "segmentation" in annotation_json.keys() + assert "id" in annotation_json.keys() + assert "category_id" in annotation_json.keys() + assert "image_id" in annotation_json.keys() + assert "area" in annotation_json.keys() + assert "bbox" in annotation_json.keys() + + # Check for one category if correct keys are there + category_json: Dict = coco_json["categories"][0] + assert "id" in category_json.keys() + assert "name" in category_json.keys() + + +def test_coco_json_and_pandas_dataframe_conversion(annotations_df: pd.DataFrame): + serialized_annotations_json = pandas_dataframe_to_coco_json(annotations_df) + serialized_annotations_df = coco_json_to_pandas_dataframe( + serialized_annotations_json + ) + + assert annotations_df.equals(serialized_annotations_df) diff --git a/contrib/segmentation/tests/preprocessing/test_segmentation.py b/contrib/segmentation/tests/preprocessing/test_segmentation.py new file mode 100644 index 000000000..810cf5d8b --- /dev/null +++ b/contrib/segmentation/tests/preprocessing/test_segmentation.py @@ -0,0 +1,74 @@ +import tempfile +from os.path import join + +import numpy as np +import pytest + +from src.preprocessing.segmentation import ( + convert_segmentation, + mask_generator, + mask_reader, + mask_writer, +) + + +@pytest.mark.parametrize( + "segmentation, source_format, target_format, width, height, expected_segmentation", + [ + ( + [ + 0.6568766457362771, + 0.4782633500684619, + 0.6578894065221794, + 0.4727863989046098, + 0.6619404496657889, + 0.4764376996805112, + ], + "aml_coco", + "coco", + 5456, + 3632, + [3584, 1737, 3589, 1717, 3612, 1730], + ) + ], +) +def test_convert_segmentation( + segmentation, source_format, target_format, width, height, expected_segmentation +): + original_segmentation = segmentation.copy() + + converted_segmentation = convert_segmentation( + segmentation, + source_format=source_format, + target_format=target_format, + image_height=height, + image_width=width, + ) + + # Check that immutability was respected + assert segmentation == original_segmentation + assert converted_segmentation == expected_segmentation + + +def test_mask_generator(standard_labels_filepath: str, mocker): + mask_gen = mask_generator(annotations_filepath=standard_labels_filepath) + image_json, mask = next(mask_gen) + zeroes = np.zeros(mask.shape) + + assert "id" in image_json + assert "width" in image_json + assert "height" in image_json + + assert not np.array_equal(zeroes, mask) + + +def test_mask_reader_and_writer(standard_labels_filepath: str): + with tempfile.TemporaryDirectory() as tempdir: + mask_filepath = join(tempdir, "mask.json") + _, original_mask = next(mask_generator(standard_labels_filepath)) + + mask_writer(original_mask, mask_filepath) + mask = mask_reader(mask_filepath) + + assert np.array_equal(mask, original_mask) + assert mask.sum() > 0 diff --git a/contrib/segmentation/train.py b/contrib/segmentation/train.py new file mode 100644 index 000000000..7f219327f --- /dev/null +++ b/contrib/segmentation/train.py @@ -0,0 +1,412 @@ +""" +""" +import argparse +import copy +import logging +import multiprocessing +import time +import uuid +from os.path import join +from pathlib import Path +from typing import Dict, Tuple + +import albumentations as A +import torch +import torch.nn as nn +from torch.utils.data.dataloader import DataLoader + +from config.augmentation import preprocessing, augmentation +from src.datasets.semantic_segmentation import ( + SemanticSegmentationPyTorchDataset, + SemanticSegmentationStochasticPatchingDataset, + ToySemanticSegmentationDataset, +) +from src.losses.loss import semantic_segmentation_class_balancer +from src.metrics.metrics import get_semantic_segmentation_metrics, log_metrics +from src.models.deeplabv3 import get_deeplabv3 +from src.models.fcn_resnet50 import get_fcn_resnet50 + + +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ("yes", "true", "t", "y", "1"): + return True + elif v.lower() in ("no", "false", "f", "n", "0"): + return False + else: + raise argparse.ArgumentTypeError("Boolean value expected.") + + +class DeepLabModelWrapper(nn.Module): + def __init__( + self, n_classes: int, pretrained: bool, is_feature_extracting: bool + ): + super().__init__() + self.model = get_deeplabv3( + n_classes, + pretrained=pretrained, + is_feature_extracting=is_feature_extracting, + ) + + def forward(self, x): + with torch.cuda.amp.autocast(): + return self.model.forward(x) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("--train-dir", type=str, required=True) + parser.add_argument("--val-dir", type=str, required=True) + parser.add_argument("--cache-dir", type=str, required=False, default=None) + parser.add_argument( + "--model-name", type=str, required=False, default="deeplab" + ) + parser.add_argument("--epochs", type=int, required=False, default=10) + parser.add_argument("--batch-size", type=int, required=False, default=2) + parser.add_argument( + "--learning-rate", type=float, required=False, default=0.001 + ) + parser.add_argument( + "--aux-loss-weight", type=float, required=False, default=0.4 + ) + parser.add_argument( + "--patch-strategy", + type=str, + required=False, + default="deterministic_center_crop", + ) + parser.add_argument( + "--val-patch-strategy", + type=str, + required=False, + default="", + ) + parser.add_argument("--toy", type=bool, required=False, default=False) + parser.add_argument("--classes", type=str, default="1, 2") + parser.add_argument( + "--log-file", type=str, required=False, default="train.log" + ) + parser.add_argument("--p-hflip", type=float, required=False, default=0.5) + parser.add_argument( + "--batch-validation-perc", type=float, required=False, default=1.0 + ) + parser.add_argument("--patch-dim", type=str, default="512, 512") + parser.add_argument("--resize-dim", type=str, default="3632, 5456") + parser.add_argument( + "--pretrained", required=False, type=str2bool, default=True + ) + parser.add_argument( + "--iou-thresholds", type=str, required=False, default="0.5, 0.3" + ) + parser.add_argument( + "--class-balance", type=str2bool, required=False, default=False + ) + parser.add_argument( + "--cache-strategy", type=str, required=False, default="none" + ) + args = parser.parse_args() + + fh = logging.FileHandler(str(args.log_file)) + log.addHandler(fh) + + train_dir = str(args.train_dir) + val_dir = str(args.val_dir) + + model_dir = join("outputs", "models") + Path(model_dir).mkdir(parents=True, exist_ok=True) + + if args.cache_dir is not None: + cache_dir = str(args.cache_dir) + else: + cache_dir = join("/tmp", str(uuid.uuid4())) + cache_strategy = str(args.cache_strategy) + + model_name = str(args.model_name) + n_epochs = int(args.epochs) + batch_size = int(args.batch_size) + learning_rate = float(args.learning_rate) + aux_loss_weight = float(args.aux_loss_weight) + patch_strategy = str(args.patch_strategy).lower() + val_patch_strategy = str(args.val_patch_strategy).lower() + is_toy = bool(args.toy) + + classes = [int(c) for c in args.classes.split(",")] + class_balance = bool(args.class_balance) + + batch_validation_perc = float(args.batch_validation_perc) + pretrained: bool = bool(args.pretrained) + + patch_dim: Tuple[int, int] = tuple( + [int(x) for x in args.patch_dim.split(",")] + ) + resize_dim: Tuple[int, int] = tuple( + [int(x) for x in args.resize_dim.split(",")] + ) + iou_thresholds = [float(x) for x in args.iou_thresholds.split(",")] + + # train on the GPU or on the CPU, if a GPU is not available + device = ( + torch.device("cuda") + if torch.cuda.is_available() + else torch.device("cpu") + ) + + log.info(f"Model Name: {model_name}") + log.info(f"Epochs: {n_epochs}") + log.info(f"Learning Rate: {learning_rate}") + log.info(f"Auxiliary Loss Weight: {aux_loss_weight}") + log.info(f"Batch Size: {batch_size}") + log.info(f"Patch Strategy: {patch_strategy}") + log.info(f"Classes: {classes}") + log.info(f"Toy: {is_toy}") + log.info(f"GPU: {torch.cuda.is_available()}") + log.info(f"Patch Dimension: {patch_dim}") + log.info(f"Resize Dimension: {resize_dim}") + log.info(f"Pretrained: {pretrained}") + + train_labels_filepath = join(train_dir, "train.json") + val_labels_filepath = join(val_dir, "val.json") + + # Toy Dataset for Integration Testing Purposes + Dataset = ( + SemanticSegmentationPyTorchDataset + if not is_toy + else ToySemanticSegmentationDataset + ) + + # Validation patch strategy may differ from train patch strategy + if val_patch_strategy == "": + if patch_strategy == "resize": + val_patch_strategy = "resize" + else: + val_patch_strategy = "crop_all" + + if patch_strategy == "stochastic": + dataset = SemanticSegmentationStochasticPatchingDataset( + f"{train_dir}/patch/*.png", + f"{train_dir}/mask", + augmentation=preprocessing, + ) + dataset_val = SemanticSegmentationStochasticPatchingDataset( + f"{val_dir}/patch/*.png", + f"{val_dir}/mask", + augmentation=preprocessing, + ) + + dataset = Dataset( + labels_filepath=train_labels_filepath, + classes=classes, + annotation_format="coco", + root_dir=train_dir, + cache_dir=join(cache_dir, "train"), + cache_strategy=cache_strategy, + # preprocessing=get_preprocessing(), + augmentation=augmentation, + patch_strategy=patch_strategy, + patch_dim=patch_dim, + resize_dim=resize_dim, + ) + dataset_val = Dataset( + labels_filepath=val_labels_filepath, + classes=classes, + annotation_format="coco", + root_dir=val_dir, + cache_dir=join(cache_dir, "val"), + cache_strategy=cache_strategy, + # Specified as augmentation because it's not guaranteed to target + # the correct instances + augmentation=get_validation_preprocessing(), + patch_strategy=val_patch_strategy, + patch_dim=patch_dim, + resize_dim=resize_dim, + ) + + dataset_len = len(dataset) + dataset_val_len = len(dataset_val) + tot_training_batches = dataset_len // batch_size + tot_validation_batches = dataset_val_len // batch_size + + print( + f"Train dataset number of images: {dataset_len} | Batch size: {batch_size} | Expected number of batches: {tot_training_batches}" + ) + print( + f"Validation dataset number of images: {dataset_val_len} | Batch size: {batch_size} | Expected number of batches: {tot_validation_batches}" + ) + + num_classes: int = len(classes) + 1 # Plus 1 for background + + # define training and validation data loaders + # drop_last True to avoid single instances which throw an error on batch norm layers + + # Maxing the num_workers at 8 due to shared memory limitations + num_workers = min( + # Preferably use 2/3's of total cpus. If the cpu count is 1, it will be set to 0 which will result + # in dataloader using the main thread + int(round(multiprocessing.cpu_count() * 2 / 3)), + 8, + ) + + dataloader = DataLoader( + dataset, + batch_size=batch_size, + shuffle=True, + num_workers=num_workers, + drop_last=True, + pin_memory=True, + ) + dataloader_val = DataLoader( + dataset_val, + batch_size=batch_size, + shuffle=False, + num_workers=num_workers, + drop_last=True, + pin_memory=True, + ) + + # get the model using our helper function + if model_name == "fcn": + model = get_fcn_resnet50(num_classes, pretrained=pretrained) + elif model_name == "deeplab": + model = DeepLabModelWrapper( + num_classes, + pretrained=pretrained, + is_feature_extracting=pretrained, + ) # get_deeplabv3(num_classes, is_feature_extracting=pretrained) + else: + raise ValueError( + f'Provided model name "{model_name}" is not supported.' + ) + + model = torch.nn.DataParallel(model) + # move model to the right device + model.to(device) + + # Create balanced cross entropy loss + if class_balance: + weights = semantic_segmentation_class_balancer(dataset) + weights = weights.to(device) + criterion = nn.CrossEntropyLoss(weight=weights) + else: + criterion = nn.CrossEntropyLoss() + criterion = criterion.to(device) + + params = [p for p in model.parameters() if p.requires_grad] + optimizer = torch.optim.SGD( + params, lr=learning_rate, momentum=0.9, weight_decay=0.0005 + ) + lr_scheduler = torch.optim.lr_scheduler.StepLR( + optimizer, step_size=3, gamma=0.1 + ) + + metrics = get_semantic_segmentation_metrics( + num_classes, thresholds=iou_thresholds + ) + metrics = metrics.to(device) + best_mean_iou = 0 + + best_model_wts: Dict = copy.deepcopy(model.state_dict()) + + scaler = torch.cuda.amp.GradScaler() + + for epoch in range(n_epochs): + start = time.time() + train_loss = 0 + val_loss = 0 + + # Switch to train mode for training + model.train() + + for batch_num, (images, targets) in enumerate(dataloader, 0): + batch_time = time.time() + images: torch.Tensor = images.to(device).float() + targets: torch.Tensor = targets.to(device).long() + + optimizer.zero_grad() + with torch.cuda.amp.autocast(): + out = model(images) + + outputs = out["out"] + loss = criterion(outputs, targets) + + # FCN Model w/o pre-training does not have an auxiliary loss component + # so we avoid this calculation + if not (not pretrained and model_name == "fcn"): + aux_outputs = out["aux"] + aux_loss = criterion(aux_outputs, targets) + loss = loss + aux_loss_weight * aux_loss + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + + preds = torch.argmax(outputs, dim=1) + + train_loss += loss.item() * images.size(0) + + metrics(preds, targets) + + print( + f"Train Epoch: {epoch} | Batch: {batch_num} | Batch Loss: {loss.item()} | Batch Time: {time.time() - batch_time}" + ) + + train_loss /= len(dataloader.dataset) + print(f"Epoch: {epoch} | Train Loss: {train_loss}") + + # Compute and log training metrics + results = metrics.compute() + results["loss"] = train_loss + log_metrics(results, classes, split="train") + metrics.reset() + + # Switch to eval mode for validation + model.eval() + + if epoch < n_epochs - 1 and batch_validation_perc < 1.0: + max_batch_num = int(tot_validation_batches * batch_validation_perc) + else: + max_batch_num = -1 + + with torch.no_grad(): + for batch_num, (images, targets) in enumerate(dataloader_val, 0): + if max_batch_num == -1 or batch_num < max_batch_num: + images: torch.Tensor = images.to(device).float() + targets: torch.Tensor = targets.to(device).long() + + with torch.cuda.amp.autocast(): + outputs = model(images)["out"] + loss = criterion(outputs, targets) + preds = torch.argmax(outputs, dim=1) + + val_loss += loss.item() * images.size(0) + metrics(preds, targets) + + print( + f"Validation Epoch: {epoch} | Batch {batch_num} | Batch Loss: {loss.item()}" + ) + + val_loss /= len(dataloader_val.dataset) + print(f"Epoch: {epoch} | Val Loss: {val_loss}") + + # Compute and log validation metrics + results = metrics.compute() + results["loss"] = val_loss + log_metrics(results, classes, split="val") + metrics.reset() + + mean_iou = float(results["mean_iou_0_5"]) + if mean_iou > best_mean_iou: + best_mean_iou = mean_iou + best_model_wts = copy.deepcopy(model.state_dict()) + torch.save( + model.state_dict(), + join(model_dir, f"{model_name}_checkpoint_{epoch}.pth"), + ) + + model.load_state_dict(best_model_wts) + torch.save(model.state_dict(), join(model_dir, f"{model_name}_final.pth"))