Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…-recipes into staging
  • Loading branch information
seanytak committed Oct 12, 2021
2 parents 7bd9e3a + b2290ee commit 4520c65
Show file tree
Hide file tree
Showing 46 changed files with 1,843 additions and 310 deletions.
3 changes: 2 additions & 1 deletion contrib/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ Each project should live in its own subdirectory ```/contrib/<project>``` and co
|---|---|---|
| [Crowd counting](crowd_counting) | Counting the number of people in low-crowd-density (e.g. less than 10 people) and high-crowd-density (e.g. thousands of people) scenarios. | [![Build Status](https://dev.azure.com/team-sharat/crowd-counting/_apis/build/status/lixzhang.cnt?branchName=lixzhang%2Fsubmodule-rev3)](https://dev.azure.com/team-sharat/crowd-counting/_build/latest?definitionId=49&branchName=lixzhang%2Fsubmodule-rev3)|
| [Action Recognition with I3D](action_recognition) | Action recognition to identify video/webcam footage from what actions are performed (e.g. "running", "opening a bottle") and at what respective start/end times. Please note, that we also have a R(2+1)D implementation of action recognition that you can find under [scenarios](../sceanrios).| |
| [Document Image Binarization](binarization) | Binarization is a technique to segment foreground from the background pixels. A simple technique for binarization is thresholding of gray-level or color document scanned images.| |
| [Document Image Cleanup](document_cleanup) | Given an input noisy document image, the aim of document image cleanup is to improve its readability and visibility by removing the noisy elements.| |


## Tools
| Directory | Project description | Build status (optional) |
Expand Down
17 changes: 0 additions & 17 deletions contrib/binarization/README.md

This file was deleted.

This file was deleted.

16 changes: 16 additions & 0 deletions contrib/document_cleanup/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Document Image Cleanup
Given an input noisy document image, the aim of document image cleanup is to improve its readability and visibility by removing the noisy elements.

## Example of document image cleanup

### Noisy input images

<img src="./light_weight_document_cleanup_ICDAR2021/sample_input_output/book_org.jpg" width="33%"> </img>
<img src="./light_weight_document_cleanup_ICDAR2021/sample_input_output/writing_org.jpg" width="33%"> </img>
<img src="./confidence_based_Sauvola_binarization/test_images/2.jpeg" width="33%"> </img>

### cleanup images

<img src="./light_weight_document_cleanup_ICDAR2021/sample_input_output/book_dnn.jpg" width="33%"> </img>
<img src="./light_weight_document_cleanup_ICDAR2021/sample_input_output/writing_dnn.jpg" width="33%"> </img>
<img src="./confidence_based_Sauvola_binarization/results/2_bin_new.png" width="33%"> </img>

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import cv2
import os
import random
import numpy as np
from random import randint
import albumentations as A
import numpy as np
import cv2
import sys
import os
from tqdm import tqdm
from utils import GetOverlappingBlocks, getListOfFiles, ImageResize



transform = A.Compose([
A.OneOf([
A.ISONoise(p=0.4),
A.JpegCompression(quality_lower=50, quality_upper=70, always_apply=False, p=0.8),
], p=0.6),
A.OneOf([
A.MotionBlur(blur_limit=10,p=.8),
A.MedianBlur(blur_limit=3, p=0.75),
A.GaussianBlur(blur_limit=7, p=0.75),
], p=0.8),
A.OneOf([
A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3,p=0.75),
A.RandomShadow(num_shadows_lower=1, num_shadows_upper=18, shadow_dimension=6, p=0.85),
], p=0.8),
])



def GenerateTrainingBlocks(data_folder,gt_folder,dataset_path='./dataset',M=256,N=256):
print(data_folder)
print('Generating training blocks!!!')
train_path = dataset_path + '/' + data_folder + '_Trainblocks'

if not os.path.exists(train_path):
os.makedirs(train_path)


train_filenames = train_path + '/train_block_names.txt'
f = open(train_filenames, 'w')

data_path = dataset_path + '/' + data_folder
gt_path = dataset_path + '/' + gt_folder

print(data_path)

filenames = getListOfFiles(data_path)
cnt = 0
print(filenames)
for name in tqdm(filenames):
print(name)
gt_filename = gt_path + '/' + name
in_filename = data_path + '/' + name
print(gt_filename)
print(in_filename)
gt_image_initial = cv2.imread(gt_filename)
in_image_initial = cv2.imread(in_filename)
print(gt_image_initial.shape,in_image_initial.shape)
for scale in [0.7,1.0,1.4]:
gt_image = ImageResize(gt_image_initial, scale)
in_image = ImageResize(in_image_initial, scale)
h,w,c = in_image.shape
gt_img = GetOverlappingBlocks(gt_image,Part=8)
in_img = GetOverlappingBlocks(in_image,Part=8)
for i in range(len(gt_img)):
train_img_path = train_path + '/block_' + str(cnt) + '.png'
gt_img_path = train_path + '/gtblock_' + str(cnt) + '.png'
cv2.imwrite(train_img_path,in_img[i])
#cv2.imwrite(train_img_path,PreProcessInput(in_img[i]))
cv2.imwrite(gt_img_path,gt_img[i])
t_name = 'block_' + str(cnt) + '.png'
f.write(t_name)
f.write('\n')
cnt += 1
Random_Block_Number_PerImage = int(len(gt_img)/5)
for i in range(Random_Block_Number_PerImage):

if(in_image.shape[0]-M>1 and in_image.shape[1]-N>1):
y = random.randint(1, in_image.shape[0]-M)
x = random.randint(1, in_image.shape[1]-N)
in_part_img = in_image[y:y+M,x:x+N,:].copy()
gt_part_img = gt_image[y:y+M,x:x+N,:].copy()
train_img_path = train_path + '/block_' + str(cnt) + '.png'
gt_img_path = train_path + '/gtblock_' + str(cnt) + '.png'
in_part_img = cv2.cvtColor(in_part_img, cv2.COLOR_BGR2RGB)
augmented_image = transform(image=in_part_img)['image']
augmented_image = cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR)

cv2.imwrite(train_img_path,augmented_image)
cv2.imwrite(gt_img_path,gt_part_img)
t_name = 'block_' + str(cnt) + '.png'
f.write(t_name)
f.write('\n')
cnt += 1
else:
break
in_part_img = np.zeros((M,N,3),dtype=np.uint8)
gt_part_img = np.zeros((M,N,3),dtype=np.uint8)
in_part_img[:,:,:] = 255
gt_part_img[:,:,:] = 255

if(in_image.shape[0]-M<=1 and in_image.shape[1]-N>1):
y = 0
x = random.randint(1, in_image.shape[1]-N)
in_part_img[:h,:,:] = in_image[:,x:x+N,:].copy()
gt_part_img[:h,:,:] = gt_image[:,x:x+N,:].copy()
if(in_image.shape[0]-M>1 and in_image.shape[1]-N<=1):
x = 0
y = random.randint(1, in_image.shape[0]-M)
in_part_img[:,:w,:] = in_image[y:y+M,:,:].copy()
gt_part_img[:,:w,:] = gt_image[y:y+M,:,:].copy()


train_img_path = train_path + '/block_' + str(cnt) + '.png'
gt_img_path = train_path + '/gtblock_' + str(cnt) + '.png'
in_part_img = cv2.cvtColor(in_part_img, cv2.COLOR_BGR2RGB)
augmented_image = transform(image=in_part_img)['image']
augmented_image = cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR)

cv2.imwrite(train_img_path,augmented_image)
cv2.imwrite(gt_img_path,gt_part_img)
t_name = 'block_' + str(cnt) + '.png'
f.write(t_name)
f.write('\n')
cnt += 1
#print(cnt)


f.close()

print('Total number of training blocks generated: ', cnt)

return train_path, train_filenames


Loading

0 comments on commit 4520c65

Please sign in to comment.