1
0
mirror of https://github.com/ciromattia/kcc synced 2025-12-12 17:26:23 +00:00

[A new image cropping algorithm]

1. Replaced both crop margins and crop margins & page num with newer algorithm.
2. Crop max power level increased to 3.0
3. Adds NumPy as a new dependency.
This commit is contained in:
neyney10
2024-10-18 17:02:40 +03:00
committed by Alex Xu
parent c26383c4b5
commit 4a661a1a17
8 changed files with 261 additions and 72 deletions

View File

@@ -6,7 +6,7 @@
<rect>
<x>0</x>
<y>0</y>
<width>450</width>
<width>481</width>
<height>400</height>
</rect>
</property>
@@ -242,7 +242,7 @@
<number>5</number>
</property>
<property name="orientation">
<enum>Qt::Horizontal</enum>
<enum>Qt::Orientation::Horizontal</enum>
</property>
</widget>
</item>
@@ -277,13 +277,13 @@
<item>
<widget class="QSlider" name="croppingPowerSlider">
<property name="maximum">
<number>200</number>
<number>300</number>
</property>
<property name="singleStep">
<number>1</number>
</property>
<property name="orientation">
<enum>Qt::Horizontal</enum>
<enum>Qt::Orientation::Horizontal</enum>
</property>
</widget>
</item>
@@ -489,13 +489,13 @@
<string notr="true">QListWidget#jobList {background:#ffffff;background-image:url(:/Other/icons/list_background.png);background-position:center center;background-repeat:no-repeat;color:rgb(0,0,0);}</string>
</property>
<property name="selectionMode">
<enum>QAbstractItemView::NoSelection</enum>
<enum>QAbstractItemView::SelectionMode::NoSelection</enum>
</property>
<property name="verticalScrollMode">
<enum>QAbstractItemView::ScrollPerPixel</enum>
<enum>QAbstractItemView::ScrollMode::ScrollPerPixel</enum>
</property>
<property name="horizontalScrollMode">
<enum>QAbstractItemView::ScrollPerPixel</enum>
<enum>QAbstractItemView::ScrollMode::ScrollPerPixel</enum>
</property>
</widget>
</item>
@@ -516,7 +516,7 @@
<bool>false</bool>
</property>
<property name="alignment">
<set>Qt::AlignJustify|Qt::AlignVCenter</set>
<set>Qt::AlignmentFlag::AlignJustify|Qt::AlignmentFlag::AlignVCenter</set>
</property>
</widget>
</item>

View File

@@ -1,6 +1,6 @@
# Resource object code (Python 3)
# Created by: object code
# Created by: The Resource Compiler for Qt version 6.5.2
# Created by: The Resource Compiler for Qt version 6.6.3
# WARNING! All changes made in this file will be lost!
from PySide6 import QtCore
@@ -11476,49 +11476,49 @@ qt_resource_struct = b"\
\x00\x00\x00X\x00\x02\x00\x00\x00\x04\x00\x00\x00\x07\
\x00\x00\x00\x00\x00\x00\x00\x00\
\x00\x00\x01\xac\x00\x00\x00\x00\x00\x01\x00\x02&\xd7\
\x00\x00\x01\x88;p\xbcB\
\x00\x00\x01\x90(\xef\xc4\x03\
\x00\x00\x01\xea\x00\x00\x00\x00\x00\x01\x00\x02{q\
\x00\x00\x01\x88;p\xbcB\
\x00\x00\x01\x90(\xef\xc4\x00\
\x00\x00\x01\xd6\x00\x00\x00\x00\x00\x01\x00\x02Qv\
\x00\x00\x01\x88;p\xbcB\
\x00\x00\x01\x90(\xef\xc3\xff\
\x00\x00\x01\xc2\x00\x00\x00\x00\x00\x01\x00\x02F\x13\
\x00\x00\x01\x89\x89D9.\
\x00\x00\x01\x90(\xef\xc4\x01\
\x00\x00\x00X\x00\x02\x00\x00\x00\x03\x00\x00\x00\x0c\
\x00\x00\x00\x00\x00\x00\x00\x00\
\x00\x00\x00\xa6\x00\x00\x00\x00\x00\x01\x00\x01(\x97\
\x00\x00\x01\x88;p\xbcB\
\x00\x00\x01\x90(\xef\xc4\x03\
\x00\x00\x00\x8c\x00\x00\x00\x00\x00\x01\x00\x01\x1d\x90\
\x00\x00\x01\x88;p\xbcB\
\x00\x00\x01\x90(\xef\xc4\x02\
\x00\x00\x00\xbc\x00\x00\x00\x00\x00\x01\x00\x011\xef\
\x00\x00\x01\x88;p\xbcB\
\x00\x00\x01\x90(\xef\xc4\x04\
\x00\x00\x00X\x00\x02\x00\x00\x00\x03\x00\x00\x00\x10\
\x00\x00\x00\x00\x00\x00\x00\x00\
\x00\x00\x02.\x00\x00\x00\x00\x00\x01\x00\x02\xad\xbd\
\x00\x00\x01\x88;p\xbcJ\
\x00\x00\x01\x90(\xef\xc4!\
\x00\x00\x02\x00\x00\x00\x00\x00\x00\x01\x00\x02\x97\xc0\
\x00\x00\x01\x88;p\xbcI\
\x00\x00\x01\x90(\xef\xc4\x1d\
\x00\x00\x02\x16\x00\x00\x00\x00\x00\x01\x00\x02\xa1\x1d\
\x00\x00\x01\x88;p\xbcI\
\x00\x00\x01\x90(\xef\xc4\x19\
\x00\x00\x00X\x00\x02\x00\x00\x00\x07\x00\x00\x00\x14\
\x00\x00\x00\x00\x00\x00\x00\x00\
\x00\x00\x01\x08\x00\x00\x00\x00\x00\x01\x00\x01H\x9b\
\x00\x00\x01\x88;p\xbcJ\
\x00\x00\x01\x90(\xef\xc4\x22\
\x00\x00\x01\x1e\x00\x00\x00\x00\x00\x01\x00\x01qC\
\x00\x00\x01\x88;p\xbcI\
\x00\x00\x01\x90(\xef\xc4\x1c\
\x00\x00\x01\x80\x00\x00\x00\x00\x00\x01\x00\x01\xca\x17\
\x00\x00\x01\x88;p\xbcI\
\x00\x00\x01\x90(\xef\xc4\x1e\
\x00\x00\x01f\x00\x00\x00\x00\x00\x01\x00\x01\x84\xd0\
\x00\x00\x01\x88;p\xbcH\
\x00\x00\x01\x90(\xef\xc4\x18\
\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x01\x00\x01D<\
\x00\x00\x01\x88;p\xbcF\
\x00\x00\x01\x90(\xef\xc4\x0e\
\x00\x00\x00\xd4\x00\x00\x00\x00\x00\x01\x00\x017\xd3\
\x00\x00\x01\x88;p\xbcH\
\x00\x00\x01\x90(\xef\xc4\x17\
\x00\x00\x01@\x00\x00\x00\x00\x00\x01\x00\x01z\x9a\
\x00\x00\x01\x88;p\xbcH\
\x00\x00\x01\x90(\xef\xc4\x18\
\x00\x00\x00X\x00\x02\x00\x00\x00\x01\x00\x00\x00\x1c\
\x00\x00\x00\x00\x00\x00\x00\x00\
\x00\x00\x00h\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\
\x00\x00\x01\x88;p\xbcH\
\x00\x00\x01\x90(\xef\xc4\x16\
"
def qInitResources():

View File

@@ -3,7 +3,7 @@
################################################################################
## Form generated from reading UI file 'KCC.ui'
##
## Created by: Qt User Interface Compiler version 6.5.2
## Created by: Qt User Interface Compiler version 6.6.3
##
## WARNING! All changes made in this file will be lost when recompiling UI file!
################################################################################
@@ -26,7 +26,7 @@ class Ui_mainWindow(object):
def setupUi(self, mainWindow):
if not mainWindow.objectName():
mainWindow.setObjectName(u"mainWindow")
mainWindow.resize(450, 400)
mainWindow.resize(481, 400)
icon = QIcon()
icon.addFile(u":/Icon/icons/comic2ebook.png", QSize(), QIcon.Normal, QIcon.Off)
mainWindow.setWindowIcon(icon)
@@ -139,7 +139,7 @@ class Ui_mainWindow(object):
self.gammaSlider.setObjectName(u"gammaSlider")
self.gammaSlider.setMaximum(250)
self.gammaSlider.setSingleStep(5)
self.gammaSlider.setOrientation(Qt.Horizontal)
self.gammaSlider.setOrientation(Qt.Orientation.Horizontal)
self.horizontalLayout_2.addWidget(self.gammaSlider)
@@ -159,9 +159,9 @@ class Ui_mainWindow(object):
self.croppingPowerSlider = QSlider(self.croppingWidget)
self.croppingPowerSlider.setObjectName(u"croppingPowerSlider")
self.croppingPowerSlider.setMaximum(200)
self.croppingPowerSlider.setMaximum(300)
self.croppingPowerSlider.setSingleStep(1)
self.croppingPowerSlider.setOrientation(Qt.Horizontal)
self.croppingPowerSlider.setOrientation(Qt.Orientation.Horizontal)
self.horizontalLayout_3.addWidget(self.croppingPowerSlider)
@@ -170,7 +170,7 @@ class Ui_mainWindow(object):
self.buttonWidget = QWidget(self.centralWidget)
self.buttonWidget.setObjectName(u"buttonWidget")
sizePolicy = QSizePolicy(QSizePolicy.Preferred, QSizePolicy.Fixed)
sizePolicy = QSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Fixed)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.buttonWidget.sizePolicy().hasHeightForWidth())
@@ -267,9 +267,9 @@ class Ui_mainWindow(object):
self.jobList = QListWidget(self.centralWidget)
self.jobList.setObjectName(u"jobList")
self.jobList.setStyleSheet(u"QListWidget#jobList {background:#ffffff;background-image:url(:/Other/icons/list_background.png);background-position:center center;background-repeat:no-repeat;color:rgb(0,0,0);}")
self.jobList.setSelectionMode(QAbstractItemView.NoSelection)
self.jobList.setVerticalScrollMode(QAbstractItemView.ScrollPerPixel)
self.jobList.setHorizontalScrollMode(QAbstractItemView.ScrollPerPixel)
self.jobList.setSelectionMode(QAbstractItemView.SelectionMode.NoSelection)
self.jobList.setVerticalScrollMode(QAbstractItemView.ScrollMode.ScrollPerPixel)
self.jobList.setHorizontalScrollMode(QAbstractItemView.ScrollMode.ScrollPerPixel)
self.gridLayout.addWidget(self.jobList, 2, 0, 1, 2)
@@ -278,7 +278,7 @@ class Ui_mainWindow(object):
self.progressBar.setMinimumSize(QSize(0, 30))
self.progressBar.setFont(font)
self.progressBar.setVisible(False)
self.progressBar.setAlignment(Qt.AlignJustify|Qt.AlignVCenter)
self.progressBar.setAlignment(Qt.AlignmentFlag.AlignJustify|Qt.AlignmentFlag.AlignVCenter)
self.gridLayout.addWidget(self.progressBar, 1, 0, 1, 2)
@@ -290,7 +290,7 @@ class Ui_mainWindow(object):
self.gridLayout_3.setContentsMargins(0, 0, 0, 0)
self.hLabel = QLabel(self.customWidget)
self.hLabel.setObjectName(u"hLabel")
sizePolicy1 = QSizePolicy(QSizePolicy.Fixed, QSizePolicy.Preferred)
sizePolicy1 = QSizePolicy(QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Preferred)
sizePolicy1.setHorizontalStretch(0)
sizePolicy1.setVerticalStretch(0)
sizePolicy1.setHeightForWidth(self.hLabel.sizePolicy().hasHeightForWidth())

View File

@@ -3,7 +3,7 @@
################################################################################
## Form generated from reading UI file 'MetaEditor.ui'
##
## Created by: Qt User Interface Compiler version 6.5.2
## Created by: Qt User Interface Compiler version 6.6.3
##
## WARNING! All changes made in this file will be lost when recompiling UI file!
################################################################################
@@ -117,7 +117,7 @@ class Ui_editorDialog(object):
self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
self.statusLabel = QLabel(self.optionWidget)
self.statusLabel.setObjectName(u"statusLabel")
sizePolicy = QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding)
sizePolicy = QSizePolicy(QSizePolicy.Policy.MinimumExpanding, QSizePolicy.Policy.MinimumExpanding)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.statusLabel.sizePolicy().hasHeightForWidth())

View File

@@ -23,6 +23,7 @@ import os
import mozjpeg_lossless_optimization
from PIL import Image, ImageOps, ImageStat, ImageChops, ImageFilter
from .shared import md5Checksum
from .page_number_crop_alg import get_bbox_crop_margin_page_number, get_bbox_crop_margin
AUTO_CROP_THRESHOLD = 0.015
@@ -358,20 +359,6 @@ class ComicPage:
else:
return Image.Resampling.LANCZOS
def getBoundingBox(self, tmptmg):
min_margin = [int(0.005 * i + 0.5) for i in tmptmg.size]
max_margin = [int(0.1 * i + 0.5) for i in tmptmg.size]
bbox = tmptmg.getbbox()
bbox = (
max(0, min(max_margin[0], bbox[0] - min_margin[0])),
max(0, min(max_margin[1], bbox[1] - min_margin[1])),
min(tmptmg.size[0],
max(tmptmg.size[0] - max_margin[0], bbox[2] + min_margin[0])),
min(tmptmg.size[1],
max(tmptmg.size[1] - max_margin[1], bbox[3] + min_margin[1])),
)
return bbox
def maybeCrop(self, box, minimum):
box_area = (box[2] - box[0]) * (box[3] - box[1])
image_area = self.image.size[0] * self.image.size[1]
@@ -379,26 +366,16 @@ class ComicPage:
self.image = self.image.crop(box)
def cropPageNumber(self, power, minimum):
if self.fill != 'white':
tmptmg = self.image.convert(mode='L')
else:
tmptmg = ImageOps.invert(self.image.convert(mode='L'))
tmptmg = tmptmg.point(lambda x: x and 255)
tmptmg = tmptmg.filter(ImageFilter.MinFilter(size=3))
tmptmg = tmptmg.filter(ImageFilter.GaussianBlur(radius=5))
tmptmg = tmptmg.point(lambda x: (x >= 16 * power) and x)
if tmptmg.getbbox():
self.maybeCrop(tmptmg.getbbox(), minimum)
bbox = get_bbox_crop_margin_page_number(self.image, power, self.fill)
if bbox:
self.maybeCrop(bbox, minimum)
def cropMargin(self, power, minimum):
if self.fill != 'white':
tmptmg = self.image.convert(mode='L')
else:
tmptmg = ImageOps.invert(self.image.convert(mode='L'))
tmptmg = tmptmg.filter(ImageFilter.GaussianBlur(radius=3))
tmptmg = tmptmg.point(lambda x: (x >= 16 * power) and x)
if tmptmg.getbbox():
self.maybeCrop(self.getBoundingBox(tmptmg), minimum)
bbox = get_bbox_crop_margin(self.image, power, self.fill)
if bbox:
self.maybeCrop(bbox, minimum)
class Cover:

View File

@@ -0,0 +1,210 @@
from PIL import ImageOps, ImageFilter
import numpy as np
'''
Some assupmptions on the page number sizes
We assume that the size of the number (including all digits) is between
'min_shape_size_tolerated_size' and 'max_shape_size_tolerated_size' relative to the image size.
We assume the distance between the digit is no more than 'max_dist_size' (x,y), and no more than 3 digits.
'''
max_shape_size_tolerated_size = (0.015*3, 0.02) # percent
min_shape_size_tolerated_size = (0.003, 0.006) # percent
window_h_size = max_shape_size_tolerated_size[1]*1.25 # percent
max_dist_size = (0.01, 0.002) # percent
'''
E-reader screen real-estate is an important resource.
More available screensize means more details can be better seen, especially text.
Text is one of the most important elements that need to be clearly readable on e-readers,
which mostly are smaller devices where the need to zoom is unwanted.
By cropping the page number on the bottom of the page, 2%-5% of the page height can be regained
that allows us to upscale the image even more.
- Most of the times the screen height is the limiting factor in upscaling, rather than its width.
Parameters:
img (PIL image): A PIL image.
power (float): The power to 'chop' through pixels matching the background. Values in range[0,3].
background_color (string): 'white' for white background, anything else for black.
Returns:
bbox (4-tuple, left|top|right|bot): The tightest bounding box calculated after trying to remove the bottom page number. Returns None if couldnt find anything satisfactory
'''
def get_bbox_crop_margin_page_number(img, power=1, background_color='white'):
if img.mode != 'L':
img = ImageOps.grayscale(img)
if background_color != 'white':
img = ImageOps.invert(img)
'''
Autocontrast: due to some threshold values, it's important that the blacks will be blacks and white will be whites.
Box/MeanFilter: Allows us to reduce noise like bad a page scan or compression artifacts.
Note: MedianFilter works better in my experience, but takes 2x-3x longer to perform.
'''
img = ImageOps.autocontrast(img, 1).filter(ImageFilter.BoxBlur(1))
'''
The 'power' parameters determines the threshold. The higher the power, the more "force" it can crop through black pixels (in case of white background)
and the lower the power, more sensitive to black pixels.
'''
threshold = threshold_from_power(power)
bw_img = img.point(lambda p: 255 if p <= threshold else 0)
left, top_y_pos, right, bot_y_pos = bw_img.getbbox()
'''
We inspect the lower bottom part of the image where we suspect might be a page number.
We assume that page number consist of 1 to 3 digits and the total min and max size of the number
is between 'min_shape_size_tolerated_size' and 'max_shape_size_tolerated_size'.
'''
window_h = int(img.size[1] * window_h_size)
img_part = img.crop((left,bot_y_pos-window_h, right, bot_y_pos))
'''
We detect related-pixels by proximity, with max distance defined in 'max_dist_size'.
Related pixels (in x axis) for each image-row are then merged to boxes with adjacent rows (in y axis)
to form bounding boxes of the detected objects (which one of them could be the page number).
'''
img_part_mat = np.array(img_part)
window_groups = []
for i in range(img_part.size[1]):
row_groups = [(g[0], g[1], i, i) for g in group_pixels(img_part_mat[i], img.size[0]*max_dist_size[0], threshold)]
window_groups.extend(row_groups)
window_groups = np.array(window_groups)
boxes = merge_boxes(window_groups, (img.size[0]*max_dist_size[0], img.size[1]*max_dist_size[1]))
'''
We assume that the lowest part of the image that has black pixels on is the page number.
In case that there are more than one detected object in the loewst part, we assume that one of them is probably
manga-content and shouldn't be cropped.
'''
# filter all small objects
boxes = list(filter(lambda box: box[1]-box[0] >= img.size[0]*min_shape_size_tolerated_size[0]
and box[3]-box[2] >= img.size[1]*min_shape_size_tolerated_size[1], boxes))
lowest_boxes = list(filter(lambda box: box[3] == window_h-1, boxes))
min_y_of_lowest_boxes = 0
if len(lowest_boxes) > 0:
min_y_of_lowest_boxes = np.min(np.array(lowest_boxes)[:,2])
boxes_in_same_y_range = list(filter(lambda box: box[3] >= min_y_of_lowest_boxes, boxes))
max_shape_size_tolerated = (img.size[0] * max_shape_size_tolerated_size[0],
max(img.size[1] *max_shape_size_tolerated_size[1], 3))
should_force_crop = (
len(boxes_in_same_y_range) == 1
and (boxes_in_same_y_range[0][1] - boxes_in_same_y_range[0][0] <= max_shape_size_tolerated[0])
and (boxes_in_same_y_range[0][3] - boxes_in_same_y_range[0][2] <= max_shape_size_tolerated[1])
)
cropped_bbox = (0, 0, img.size[0], img.size[1])
if should_force_crop:
cropped_bbox = (0, 0, img.size[0], bot_y_pos-(window_h-boxes_in_same_y_range[0][2]+1))
cropped_bbox = bw_img.crop(cropped_bbox).getbbox()
return cropped_bbox
'''
Parameters:
img (PIL image): A PIL image.
power (float): The power to 'chop' through pixels matching the background. Values in range[0,3].
background_color (string): 'white' for white background, anything else for black.
Returns:
bbox (4-tuple, left|top|right|bot): The tightest bounding box calculated after trying to remove the bottom page number. Returns None if couldnt find anything satisfactory
'''
def get_bbox_crop_margin(img, power=1, background_color='white'):
if img.mode != 'L':
img = ImageOps.grayscale(img)
if background_color != 'white':
img = ImageOps.invert(img)
'''
Autocontrast: due to some threshold values, it's important that the blacks will be blacks and white will be whites.
Box/MeanFilter: Allows us to reduce noise like bad a page scan or compression artifacts.
Note: MedianFilter works better in my experience, but takes 2x-3x longer to perform.
'''
img = ImageOps.autocontrast(img, 1).filter(ImageFilter.BoxBlur(1))
'''
The 'power' parameters determines the threshold. The higher the power, the more "force" it can crop through black pixels (in case of white background)
and the lower the power, more sensitive to black pixels.
'''
threshold = threshold_from_power(power)
bw_img = img.point(lambda p: 255 if p <= threshold else 0)
return bw_img.getbbox()
'''
Groups close pixels together (x axis)
'''
def group_pixels(row, max_dist_tolerated, threshold):
groups = []
idx = np.where(row <= threshold)[0]
group_start = -1
group_end = 0
for i in range(len(idx)):
dist = idx[i] - group_end
if group_start == -1:
group_start = idx[i]
group_end = idx[i]
elif dist <= max_dist_tolerated:
group_end = idx[i]
else:
groups.append((group_start, group_end))
group_start = -1
group_end = -1
if group_start != -1:
groups.append((group_start, group_end))
return groups
def box_intersect(box1, box2, max_dist):
return not (box2[0]-max_dist[0] > box1[1]
or box2[1]+max_dist[0] < box1[0]
or box2[2]-max_dist[1] > box1[3]
or box2[3]+max_dist[1] < box1[2])
'''
Merge close bounding boxes (left,right, top,bot) (x axis) with distance threshold defined in
'max_dist_tolerated'. Boxes with less 'max_dist_tolerated' distance (Chebyshev distance).
'''
def merge_boxes(boxes, max_dist_tolerated):
j = 0
while j < len(boxes)-1:
g1 = boxes[j]
intersecting_boxes = []
other_boxes = []
for i in range(j+1,len(boxes)):
g2 = boxes[i]
if box_intersect(g1,g2, max_dist_tolerated):
intersecting_boxes.append(g2)
else:
other_boxes.append(g2)
if len(intersecting_boxes) > 0:
intersecting_boxes = np.array([g1, *intersecting_boxes])
merged_box = np.array([
np.min(intersecting_boxes[:,0]),
np.max(intersecting_boxes[:,1]),
np.min(intersecting_boxes[:,2]),
np.max(intersecting_boxes[:,3])
])
other_boxes.append(merged_box)
boxes = np.concatenate([boxes[:j], other_boxes])
j = 0
else:
j += 1
return boxes
def threshold_from_power(power):
return 240-(power*64)

View File

@@ -8,3 +8,4 @@ packaging>=23.2
mozjpeg-lossless-optimization>=1.1.2
natsort[fast]>=8.4.0
distro>=1.8.0
numpy>=1.22.4,<2.0.0

View File

@@ -83,6 +83,7 @@ setuptools.setup(
'mozjpeg-lossless-optimization>=1.1.2',
'natsort[fast]>=8.4.0',
'distro',
'numpy>=1.22.4,<2.0.0'
],
classifiers=[],
zip_safe=False,