mirror of
https://github.com/ciromattia/kcc
synced 2025-12-12 17:26:23 +00:00
[A new image cropping algorithm]
1. Replaced both crop margins and crop margins & page num with newer algorithm. 2. Crop max power level increased to 3.0 3. Adds NumPy as a new dependency.
This commit is contained in:
16
gui/KCC.ui
16
gui/KCC.ui
@@ -6,7 +6,7 @@
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>450</width>
|
||||
<width>481</width>
|
||||
<height>400</height>
|
||||
</rect>
|
||||
</property>
|
||||
@@ -242,7 +242,7 @@
|
||||
<number>5</number>
|
||||
</property>
|
||||
<property name="orientation">
|
||||
<enum>Qt::Horizontal</enum>
|
||||
<enum>Qt::Orientation::Horizontal</enum>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@@ -277,13 +277,13 @@
|
||||
<item>
|
||||
<widget class="QSlider" name="croppingPowerSlider">
|
||||
<property name="maximum">
|
||||
<number>200</number>
|
||||
<number>300</number>
|
||||
</property>
|
||||
<property name="singleStep">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<property name="orientation">
|
||||
<enum>Qt::Horizontal</enum>
|
||||
<enum>Qt::Orientation::Horizontal</enum>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@@ -489,13 +489,13 @@
|
||||
<string notr="true">QListWidget#jobList {background:#ffffff;background-image:url(:/Other/icons/list_background.png);background-position:center center;background-repeat:no-repeat;color:rgb(0,0,0);}</string>
|
||||
</property>
|
||||
<property name="selectionMode">
|
||||
<enum>QAbstractItemView::NoSelection</enum>
|
||||
<enum>QAbstractItemView::SelectionMode::NoSelection</enum>
|
||||
</property>
|
||||
<property name="verticalScrollMode">
|
||||
<enum>QAbstractItemView::ScrollPerPixel</enum>
|
||||
<enum>QAbstractItemView::ScrollMode::ScrollPerPixel</enum>
|
||||
</property>
|
||||
<property name="horizontalScrollMode">
|
||||
<enum>QAbstractItemView::ScrollPerPixel</enum>
|
||||
<enum>QAbstractItemView::ScrollMode::ScrollPerPixel</enum>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@@ -516,7 +516,7 @@
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
<property name="alignment">
|
||||
<set>Qt::AlignJustify|Qt::AlignVCenter</set>
|
||||
<set>Qt::AlignmentFlag::AlignJustify|Qt::AlignmentFlag::AlignVCenter</set>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Resource object code (Python 3)
|
||||
# Created by: object code
|
||||
# Created by: The Resource Compiler for Qt version 6.5.2
|
||||
# Created by: The Resource Compiler for Qt version 6.6.3
|
||||
# WARNING! All changes made in this file will be lost!
|
||||
|
||||
from PySide6 import QtCore
|
||||
@@ -11476,49 +11476,49 @@ qt_resource_struct = b"\
|
||||
\x00\x00\x00X\x00\x02\x00\x00\x00\x04\x00\x00\x00\x07\
|
||||
\x00\x00\x00\x00\x00\x00\x00\x00\
|
||||
\x00\x00\x01\xac\x00\x00\x00\x00\x00\x01\x00\x02&\xd7\
|
||||
\x00\x00\x01\x88;p\xbcB\
|
||||
\x00\x00\x01\x90(\xef\xc4\x03\
|
||||
\x00\x00\x01\xea\x00\x00\x00\x00\x00\x01\x00\x02{q\
|
||||
\x00\x00\x01\x88;p\xbcB\
|
||||
\x00\x00\x01\x90(\xef\xc4\x00\
|
||||
\x00\x00\x01\xd6\x00\x00\x00\x00\x00\x01\x00\x02Qv\
|
||||
\x00\x00\x01\x88;p\xbcB\
|
||||
\x00\x00\x01\x90(\xef\xc3\xff\
|
||||
\x00\x00\x01\xc2\x00\x00\x00\x00\x00\x01\x00\x02F\x13\
|
||||
\x00\x00\x01\x89\x89D9.\
|
||||
\x00\x00\x01\x90(\xef\xc4\x01\
|
||||
\x00\x00\x00X\x00\x02\x00\x00\x00\x03\x00\x00\x00\x0c\
|
||||
\x00\x00\x00\x00\x00\x00\x00\x00\
|
||||
\x00\x00\x00\xa6\x00\x00\x00\x00\x00\x01\x00\x01(\x97\
|
||||
\x00\x00\x01\x88;p\xbcB\
|
||||
\x00\x00\x01\x90(\xef\xc4\x03\
|
||||
\x00\x00\x00\x8c\x00\x00\x00\x00\x00\x01\x00\x01\x1d\x90\
|
||||
\x00\x00\x01\x88;p\xbcB\
|
||||
\x00\x00\x01\x90(\xef\xc4\x02\
|
||||
\x00\x00\x00\xbc\x00\x00\x00\x00\x00\x01\x00\x011\xef\
|
||||
\x00\x00\x01\x88;p\xbcB\
|
||||
\x00\x00\x01\x90(\xef\xc4\x04\
|
||||
\x00\x00\x00X\x00\x02\x00\x00\x00\x03\x00\x00\x00\x10\
|
||||
\x00\x00\x00\x00\x00\x00\x00\x00\
|
||||
\x00\x00\x02.\x00\x00\x00\x00\x00\x01\x00\x02\xad\xbd\
|
||||
\x00\x00\x01\x88;p\xbcJ\
|
||||
\x00\x00\x01\x90(\xef\xc4!\
|
||||
\x00\x00\x02\x00\x00\x00\x00\x00\x00\x01\x00\x02\x97\xc0\
|
||||
\x00\x00\x01\x88;p\xbcI\
|
||||
\x00\x00\x01\x90(\xef\xc4\x1d\
|
||||
\x00\x00\x02\x16\x00\x00\x00\x00\x00\x01\x00\x02\xa1\x1d\
|
||||
\x00\x00\x01\x88;p\xbcI\
|
||||
\x00\x00\x01\x90(\xef\xc4\x19\
|
||||
\x00\x00\x00X\x00\x02\x00\x00\x00\x07\x00\x00\x00\x14\
|
||||
\x00\x00\x00\x00\x00\x00\x00\x00\
|
||||
\x00\x00\x01\x08\x00\x00\x00\x00\x00\x01\x00\x01H\x9b\
|
||||
\x00\x00\x01\x88;p\xbcJ\
|
||||
\x00\x00\x01\x90(\xef\xc4\x22\
|
||||
\x00\x00\x01\x1e\x00\x00\x00\x00\x00\x01\x00\x01qC\
|
||||
\x00\x00\x01\x88;p\xbcI\
|
||||
\x00\x00\x01\x90(\xef\xc4\x1c\
|
||||
\x00\x00\x01\x80\x00\x00\x00\x00\x00\x01\x00\x01\xca\x17\
|
||||
\x00\x00\x01\x88;p\xbcI\
|
||||
\x00\x00\x01\x90(\xef\xc4\x1e\
|
||||
\x00\x00\x01f\x00\x00\x00\x00\x00\x01\x00\x01\x84\xd0\
|
||||
\x00\x00\x01\x88;p\xbcH\
|
||||
\x00\x00\x01\x90(\xef\xc4\x18\
|
||||
\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x01\x00\x01D<\
|
||||
\x00\x00\x01\x88;p\xbcF\
|
||||
\x00\x00\x01\x90(\xef\xc4\x0e\
|
||||
\x00\x00\x00\xd4\x00\x00\x00\x00\x00\x01\x00\x017\xd3\
|
||||
\x00\x00\x01\x88;p\xbcH\
|
||||
\x00\x00\x01\x90(\xef\xc4\x17\
|
||||
\x00\x00\x01@\x00\x00\x00\x00\x00\x01\x00\x01z\x9a\
|
||||
\x00\x00\x01\x88;p\xbcH\
|
||||
\x00\x00\x01\x90(\xef\xc4\x18\
|
||||
\x00\x00\x00X\x00\x02\x00\x00\x00\x01\x00\x00\x00\x1c\
|
||||
\x00\x00\x00\x00\x00\x00\x00\x00\
|
||||
\x00\x00\x00h\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\
|
||||
\x00\x00\x01\x88;p\xbcH\
|
||||
\x00\x00\x01\x90(\xef\xc4\x16\
|
||||
"
|
||||
|
||||
def qInitResources():
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
################################################################################
|
||||
## Form generated from reading UI file 'KCC.ui'
|
||||
##
|
||||
## Created by: Qt User Interface Compiler version 6.5.2
|
||||
## Created by: Qt User Interface Compiler version 6.6.3
|
||||
##
|
||||
## WARNING! All changes made in this file will be lost when recompiling UI file!
|
||||
################################################################################
|
||||
@@ -26,7 +26,7 @@ class Ui_mainWindow(object):
|
||||
def setupUi(self, mainWindow):
|
||||
if not mainWindow.objectName():
|
||||
mainWindow.setObjectName(u"mainWindow")
|
||||
mainWindow.resize(450, 400)
|
||||
mainWindow.resize(481, 400)
|
||||
icon = QIcon()
|
||||
icon.addFile(u":/Icon/icons/comic2ebook.png", QSize(), QIcon.Normal, QIcon.Off)
|
||||
mainWindow.setWindowIcon(icon)
|
||||
@@ -139,7 +139,7 @@ class Ui_mainWindow(object):
|
||||
self.gammaSlider.setObjectName(u"gammaSlider")
|
||||
self.gammaSlider.setMaximum(250)
|
||||
self.gammaSlider.setSingleStep(5)
|
||||
self.gammaSlider.setOrientation(Qt.Horizontal)
|
||||
self.gammaSlider.setOrientation(Qt.Orientation.Horizontal)
|
||||
|
||||
self.horizontalLayout_2.addWidget(self.gammaSlider)
|
||||
|
||||
@@ -159,9 +159,9 @@ class Ui_mainWindow(object):
|
||||
|
||||
self.croppingPowerSlider = QSlider(self.croppingWidget)
|
||||
self.croppingPowerSlider.setObjectName(u"croppingPowerSlider")
|
||||
self.croppingPowerSlider.setMaximum(200)
|
||||
self.croppingPowerSlider.setMaximum(300)
|
||||
self.croppingPowerSlider.setSingleStep(1)
|
||||
self.croppingPowerSlider.setOrientation(Qt.Horizontal)
|
||||
self.croppingPowerSlider.setOrientation(Qt.Orientation.Horizontal)
|
||||
|
||||
self.horizontalLayout_3.addWidget(self.croppingPowerSlider)
|
||||
|
||||
@@ -170,7 +170,7 @@ class Ui_mainWindow(object):
|
||||
|
||||
self.buttonWidget = QWidget(self.centralWidget)
|
||||
self.buttonWidget.setObjectName(u"buttonWidget")
|
||||
sizePolicy = QSizePolicy(QSizePolicy.Preferred, QSizePolicy.Fixed)
|
||||
sizePolicy = QSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Fixed)
|
||||
sizePolicy.setHorizontalStretch(0)
|
||||
sizePolicy.setVerticalStretch(0)
|
||||
sizePolicy.setHeightForWidth(self.buttonWidget.sizePolicy().hasHeightForWidth())
|
||||
@@ -267,9 +267,9 @@ class Ui_mainWindow(object):
|
||||
self.jobList = QListWidget(self.centralWidget)
|
||||
self.jobList.setObjectName(u"jobList")
|
||||
self.jobList.setStyleSheet(u"QListWidget#jobList {background:#ffffff;background-image:url(:/Other/icons/list_background.png);background-position:center center;background-repeat:no-repeat;color:rgb(0,0,0);}")
|
||||
self.jobList.setSelectionMode(QAbstractItemView.NoSelection)
|
||||
self.jobList.setVerticalScrollMode(QAbstractItemView.ScrollPerPixel)
|
||||
self.jobList.setHorizontalScrollMode(QAbstractItemView.ScrollPerPixel)
|
||||
self.jobList.setSelectionMode(QAbstractItemView.SelectionMode.NoSelection)
|
||||
self.jobList.setVerticalScrollMode(QAbstractItemView.ScrollMode.ScrollPerPixel)
|
||||
self.jobList.setHorizontalScrollMode(QAbstractItemView.ScrollMode.ScrollPerPixel)
|
||||
|
||||
self.gridLayout.addWidget(self.jobList, 2, 0, 1, 2)
|
||||
|
||||
@@ -278,7 +278,7 @@ class Ui_mainWindow(object):
|
||||
self.progressBar.setMinimumSize(QSize(0, 30))
|
||||
self.progressBar.setFont(font)
|
||||
self.progressBar.setVisible(False)
|
||||
self.progressBar.setAlignment(Qt.AlignJustify|Qt.AlignVCenter)
|
||||
self.progressBar.setAlignment(Qt.AlignmentFlag.AlignJustify|Qt.AlignmentFlag.AlignVCenter)
|
||||
|
||||
self.gridLayout.addWidget(self.progressBar, 1, 0, 1, 2)
|
||||
|
||||
@@ -290,7 +290,7 @@ class Ui_mainWindow(object):
|
||||
self.gridLayout_3.setContentsMargins(0, 0, 0, 0)
|
||||
self.hLabel = QLabel(self.customWidget)
|
||||
self.hLabel.setObjectName(u"hLabel")
|
||||
sizePolicy1 = QSizePolicy(QSizePolicy.Fixed, QSizePolicy.Preferred)
|
||||
sizePolicy1 = QSizePolicy(QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Preferred)
|
||||
sizePolicy1.setHorizontalStretch(0)
|
||||
sizePolicy1.setVerticalStretch(0)
|
||||
sizePolicy1.setHeightForWidth(self.hLabel.sizePolicy().hasHeightForWidth())
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
################################################################################
|
||||
## Form generated from reading UI file 'MetaEditor.ui'
|
||||
##
|
||||
## Created by: Qt User Interface Compiler version 6.5.2
|
||||
## Created by: Qt User Interface Compiler version 6.6.3
|
||||
##
|
||||
## WARNING! All changes made in this file will be lost when recompiling UI file!
|
||||
################################################################################
|
||||
@@ -117,7 +117,7 @@ class Ui_editorDialog(object):
|
||||
self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
|
||||
self.statusLabel = QLabel(self.optionWidget)
|
||||
self.statusLabel.setObjectName(u"statusLabel")
|
||||
sizePolicy = QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding)
|
||||
sizePolicy = QSizePolicy(QSizePolicy.Policy.MinimumExpanding, QSizePolicy.Policy.MinimumExpanding)
|
||||
sizePolicy.setHorizontalStretch(0)
|
||||
sizePolicy.setVerticalStretch(0)
|
||||
sizePolicy.setHeightForWidth(self.statusLabel.sizePolicy().hasHeightForWidth())
|
||||
|
||||
@@ -23,6 +23,7 @@ import os
|
||||
import mozjpeg_lossless_optimization
|
||||
from PIL import Image, ImageOps, ImageStat, ImageChops, ImageFilter
|
||||
from .shared import md5Checksum
|
||||
from .page_number_crop_alg import get_bbox_crop_margin_page_number, get_bbox_crop_margin
|
||||
|
||||
AUTO_CROP_THRESHOLD = 0.015
|
||||
|
||||
@@ -358,20 +359,6 @@ class ComicPage:
|
||||
else:
|
||||
return Image.Resampling.LANCZOS
|
||||
|
||||
def getBoundingBox(self, tmptmg):
|
||||
min_margin = [int(0.005 * i + 0.5) for i in tmptmg.size]
|
||||
max_margin = [int(0.1 * i + 0.5) for i in tmptmg.size]
|
||||
bbox = tmptmg.getbbox()
|
||||
bbox = (
|
||||
max(0, min(max_margin[0], bbox[0] - min_margin[0])),
|
||||
max(0, min(max_margin[1], bbox[1] - min_margin[1])),
|
||||
min(tmptmg.size[0],
|
||||
max(tmptmg.size[0] - max_margin[0], bbox[2] + min_margin[0])),
|
||||
min(tmptmg.size[1],
|
||||
max(tmptmg.size[1] - max_margin[1], bbox[3] + min_margin[1])),
|
||||
)
|
||||
return bbox
|
||||
|
||||
def maybeCrop(self, box, minimum):
|
||||
box_area = (box[2] - box[0]) * (box[3] - box[1])
|
||||
image_area = self.image.size[0] * self.image.size[1]
|
||||
@@ -379,26 +366,16 @@ class ComicPage:
|
||||
self.image = self.image.crop(box)
|
||||
|
||||
def cropPageNumber(self, power, minimum):
|
||||
if self.fill != 'white':
|
||||
tmptmg = self.image.convert(mode='L')
|
||||
else:
|
||||
tmptmg = ImageOps.invert(self.image.convert(mode='L'))
|
||||
tmptmg = tmptmg.point(lambda x: x and 255)
|
||||
tmptmg = tmptmg.filter(ImageFilter.MinFilter(size=3))
|
||||
tmptmg = tmptmg.filter(ImageFilter.GaussianBlur(radius=5))
|
||||
tmptmg = tmptmg.point(lambda x: (x >= 16 * power) and x)
|
||||
if tmptmg.getbbox():
|
||||
self.maybeCrop(tmptmg.getbbox(), minimum)
|
||||
bbox = get_bbox_crop_margin_page_number(self.image, power, self.fill)
|
||||
|
||||
if bbox:
|
||||
self.maybeCrop(bbox, minimum)
|
||||
|
||||
def cropMargin(self, power, minimum):
|
||||
if self.fill != 'white':
|
||||
tmptmg = self.image.convert(mode='L')
|
||||
else:
|
||||
tmptmg = ImageOps.invert(self.image.convert(mode='L'))
|
||||
tmptmg = tmptmg.filter(ImageFilter.GaussianBlur(radius=3))
|
||||
tmptmg = tmptmg.point(lambda x: (x >= 16 * power) and x)
|
||||
if tmptmg.getbbox():
|
||||
self.maybeCrop(self.getBoundingBox(tmptmg), minimum)
|
||||
bbox = get_bbox_crop_margin(self.image, power, self.fill)
|
||||
|
||||
if bbox:
|
||||
self.maybeCrop(bbox, minimum)
|
||||
|
||||
|
||||
class Cover:
|
||||
|
||||
210
kindlecomicconverter/page_number_crop_alg.py
Normal file
210
kindlecomicconverter/page_number_crop_alg.py
Normal file
@@ -0,0 +1,210 @@
|
||||
from PIL import ImageOps, ImageFilter
|
||||
import numpy as np
|
||||
|
||||
'''
|
||||
Some assupmptions on the page number sizes
|
||||
We assume that the size of the number (including all digits) is between
|
||||
'min_shape_size_tolerated_size' and 'max_shape_size_tolerated_size' relative to the image size.
|
||||
We assume the distance between the digit is no more than 'max_dist_size' (x,y), and no more than 3 digits.
|
||||
'''
|
||||
max_shape_size_tolerated_size = (0.015*3, 0.02) # percent
|
||||
min_shape_size_tolerated_size = (0.003, 0.006) # percent
|
||||
window_h_size = max_shape_size_tolerated_size[1]*1.25 # percent
|
||||
max_dist_size = (0.01, 0.002) # percent
|
||||
|
||||
|
||||
'''
|
||||
E-reader screen real-estate is an important resource.
|
||||
More available screensize means more details can be better seen, especially text.
|
||||
Text is one of the most important elements that need to be clearly readable on e-readers,
|
||||
which mostly are smaller devices where the need to zoom is unwanted.
|
||||
|
||||
By cropping the page number on the bottom of the page, 2%-5% of the page height can be regained
|
||||
that allows us to upscale the image even more.
|
||||
- Most of the times the screen height is the limiting factor in upscaling, rather than its width.
|
||||
|
||||
Parameters:
|
||||
img (PIL image): A PIL image.
|
||||
power (float): The power to 'chop' through pixels matching the background. Values in range[0,3].
|
||||
background_color (string): 'white' for white background, anything else for black.
|
||||
Returns:
|
||||
bbox (4-tuple, left|top|right|bot): The tightest bounding box calculated after trying to remove the bottom page number. Returns None if couldnt find anything satisfactory
|
||||
'''
|
||||
def get_bbox_crop_margin_page_number(img, power=1, background_color='white'):
|
||||
if img.mode != 'L':
|
||||
img = ImageOps.grayscale(img)
|
||||
|
||||
if background_color != 'white':
|
||||
img = ImageOps.invert(img)
|
||||
|
||||
'''
|
||||
Autocontrast: due to some threshold values, it's important that the blacks will be blacks and white will be whites.
|
||||
Box/MeanFilter: Allows us to reduce noise like bad a page scan or compression artifacts.
|
||||
Note: MedianFilter works better in my experience, but takes 2x-3x longer to perform.
|
||||
'''
|
||||
img = ImageOps.autocontrast(img, 1).filter(ImageFilter.BoxBlur(1))
|
||||
|
||||
'''
|
||||
The 'power' parameters determines the threshold. The higher the power, the more "force" it can crop through black pixels (in case of white background)
|
||||
and the lower the power, more sensitive to black pixels.
|
||||
'''
|
||||
threshold = threshold_from_power(power)
|
||||
bw_img = img.point(lambda p: 255 if p <= threshold else 0)
|
||||
left, top_y_pos, right, bot_y_pos = bw_img.getbbox()
|
||||
|
||||
'''
|
||||
We inspect the lower bottom part of the image where we suspect might be a page number.
|
||||
We assume that page number consist of 1 to 3 digits and the total min and max size of the number
|
||||
is between 'min_shape_size_tolerated_size' and 'max_shape_size_tolerated_size'.
|
||||
'''
|
||||
window_h = int(img.size[1] * window_h_size)
|
||||
img_part = img.crop((left,bot_y_pos-window_h, right, bot_y_pos))
|
||||
|
||||
'''
|
||||
We detect related-pixels by proximity, with max distance defined in 'max_dist_size'.
|
||||
Related pixels (in x axis) for each image-row are then merged to boxes with adjacent rows (in y axis)
|
||||
to form bounding boxes of the detected objects (which one of them could be the page number).
|
||||
'''
|
||||
img_part_mat = np.array(img_part)
|
||||
window_groups = []
|
||||
for i in range(img_part.size[1]):
|
||||
row_groups = [(g[0], g[1], i, i) for g in group_pixels(img_part_mat[i], img.size[0]*max_dist_size[0], threshold)]
|
||||
window_groups.extend(row_groups)
|
||||
|
||||
window_groups = np.array(window_groups)
|
||||
|
||||
boxes = merge_boxes(window_groups, (img.size[0]*max_dist_size[0], img.size[1]*max_dist_size[1]))
|
||||
'''
|
||||
We assume that the lowest part of the image that has black pixels on is the page number.
|
||||
In case that there are more than one detected object in the loewst part, we assume that one of them is probably
|
||||
manga-content and shouldn't be cropped.
|
||||
'''
|
||||
# filter all small objects
|
||||
boxes = list(filter(lambda box: box[1]-box[0] >= img.size[0]*min_shape_size_tolerated_size[0]
|
||||
and box[3]-box[2] >= img.size[1]*min_shape_size_tolerated_size[1], boxes))
|
||||
lowest_boxes = list(filter(lambda box: box[3] == window_h-1, boxes))
|
||||
|
||||
min_y_of_lowest_boxes = 0
|
||||
if len(lowest_boxes) > 0:
|
||||
min_y_of_lowest_boxes = np.min(np.array(lowest_boxes)[:,2])
|
||||
|
||||
boxes_in_same_y_range = list(filter(lambda box: box[3] >= min_y_of_lowest_boxes, boxes))
|
||||
|
||||
max_shape_size_tolerated = (img.size[0] * max_shape_size_tolerated_size[0],
|
||||
max(img.size[1] *max_shape_size_tolerated_size[1], 3))
|
||||
|
||||
should_force_crop = (
|
||||
len(boxes_in_same_y_range) == 1
|
||||
and (boxes_in_same_y_range[0][1] - boxes_in_same_y_range[0][0] <= max_shape_size_tolerated[0])
|
||||
and (boxes_in_same_y_range[0][3] - boxes_in_same_y_range[0][2] <= max_shape_size_tolerated[1])
|
||||
)
|
||||
|
||||
cropped_bbox = (0, 0, img.size[0], img.size[1])
|
||||
if should_force_crop:
|
||||
cropped_bbox = (0, 0, img.size[0], bot_y_pos-(window_h-boxes_in_same_y_range[0][2]+1))
|
||||
|
||||
cropped_bbox = bw_img.crop(cropped_bbox).getbbox()
|
||||
|
||||
return cropped_bbox
|
||||
|
||||
|
||||
'''
|
||||
Parameters:
|
||||
img (PIL image): A PIL image.
|
||||
power (float): The power to 'chop' through pixels matching the background. Values in range[0,3].
|
||||
background_color (string): 'white' for white background, anything else for black.
|
||||
Returns:
|
||||
bbox (4-tuple, left|top|right|bot): The tightest bounding box calculated after trying to remove the bottom page number. Returns None if couldnt find anything satisfactory
|
||||
'''
|
||||
def get_bbox_crop_margin(img, power=1, background_color='white'):
|
||||
if img.mode != 'L':
|
||||
img = ImageOps.grayscale(img)
|
||||
|
||||
if background_color != 'white':
|
||||
img = ImageOps.invert(img)
|
||||
|
||||
'''
|
||||
Autocontrast: due to some threshold values, it's important that the blacks will be blacks and white will be whites.
|
||||
Box/MeanFilter: Allows us to reduce noise like bad a page scan or compression artifacts.
|
||||
Note: MedianFilter works better in my experience, but takes 2x-3x longer to perform.
|
||||
'''
|
||||
img = ImageOps.autocontrast(img, 1).filter(ImageFilter.BoxBlur(1))
|
||||
|
||||
'''
|
||||
The 'power' parameters determines the threshold. The higher the power, the more "force" it can crop through black pixels (in case of white background)
|
||||
and the lower the power, more sensitive to black pixels.
|
||||
'''
|
||||
threshold = threshold_from_power(power)
|
||||
bw_img = img.point(lambda p: 255 if p <= threshold else 0)
|
||||
|
||||
return bw_img.getbbox()
|
||||
|
||||
|
||||
'''
|
||||
Groups close pixels together (x axis)
|
||||
'''
|
||||
def group_pixels(row, max_dist_tolerated, threshold):
|
||||
groups = []
|
||||
idx = np.where(row <= threshold)[0]
|
||||
|
||||
group_start = -1
|
||||
group_end = 0
|
||||
for i in range(len(idx)):
|
||||
dist = idx[i] - group_end
|
||||
if group_start == -1:
|
||||
group_start = idx[i]
|
||||
group_end = idx[i]
|
||||
elif dist <= max_dist_tolerated:
|
||||
group_end = idx[i]
|
||||
else:
|
||||
groups.append((group_start, group_end))
|
||||
group_start = -1
|
||||
group_end = -1
|
||||
|
||||
if group_start != -1:
|
||||
groups.append((group_start, group_end))
|
||||
|
||||
return groups
|
||||
|
||||
|
||||
def box_intersect(box1, box2, max_dist):
|
||||
return not (box2[0]-max_dist[0] > box1[1]
|
||||
or box2[1]+max_dist[0] < box1[0]
|
||||
or box2[2]-max_dist[1] > box1[3]
|
||||
or box2[3]+max_dist[1] < box1[2])
|
||||
|
||||
'''
|
||||
Merge close bounding boxes (left,right, top,bot) (x axis) with distance threshold defined in
|
||||
'max_dist_tolerated'. Boxes with less 'max_dist_tolerated' distance (Chebyshev distance).
|
||||
'''
|
||||
def merge_boxes(boxes, max_dist_tolerated):
|
||||
j = 0
|
||||
while j < len(boxes)-1:
|
||||
g1 = boxes[j]
|
||||
intersecting_boxes = []
|
||||
other_boxes = []
|
||||
for i in range(j+1,len(boxes)):
|
||||
g2 = boxes[i]
|
||||
if box_intersect(g1,g2, max_dist_tolerated):
|
||||
intersecting_boxes.append(g2)
|
||||
else:
|
||||
other_boxes.append(g2)
|
||||
|
||||
if len(intersecting_boxes) > 0:
|
||||
intersecting_boxes = np.array([g1, *intersecting_boxes])
|
||||
merged_box = np.array([
|
||||
np.min(intersecting_boxes[:,0]),
|
||||
np.max(intersecting_boxes[:,1]),
|
||||
np.min(intersecting_boxes[:,2]),
|
||||
np.max(intersecting_boxes[:,3])
|
||||
])
|
||||
other_boxes.append(merged_box)
|
||||
boxes = np.concatenate([boxes[:j], other_boxes])
|
||||
j = 0
|
||||
else:
|
||||
j += 1
|
||||
return boxes
|
||||
|
||||
|
||||
def threshold_from_power(power):
|
||||
return 240-(power*64)
|
||||
@@ -8,3 +8,4 @@ packaging>=23.2
|
||||
mozjpeg-lossless-optimization>=1.1.2
|
||||
natsort[fast]>=8.4.0
|
||||
distro>=1.8.0
|
||||
numpy>=1.22.4,<2.0.0
|
||||
|
||||
Reference in New Issue
Block a user