[A new image cropping algorithm]

1. Replaced both crop margins and crop margins & page num with newer algorithm. 2. Crop max power level increased to 3.0 3. Adds NumPy as a new dependency.
2026-01-28 07:57:41 +00:00 · 2024-10-18 17:02:40 +03:00
parent c26383c4b5
commit 4a661a1a17
8 changed files with 261 additions and 72 deletions
--- a/gui/KCC.ui
+++ b/gui/KCC.ui
@@ -6,7 +6,7 @@
   <rect>
    <x>0</x>
    <y>0</y>
-    <width>450</width>
+    <width>481</width>
    <height>400</height>
   </rect>
  </property>
@@ -242,7 +242,7 @@
          <number>5</number>
         </property>
         <property name="orientation">
-          <enum>Qt::Horizontal</enum>
+          <enum>Qt::Orientation::Horizontal</enum>
         </property>
        </widget>
       </item>
@@ -277,13 +277,13 @@
       <item>
        <widget class="QSlider" name="croppingPowerSlider">
         <property name="maximum">
-          <number>200</number>
+          <number>300</number>
         </property>
         <property name="singleStep">
          <number>1</number>
         </property>
         <property name="orientation">
-          <enum>Qt::Horizontal</enum>
+          <enum>Qt::Orientation::Horizontal</enum>
         </property>
        </widget>
       </item>
@@ -489,13 +489,13 @@
       <string notr="true">QListWidget#jobList {background:#ffffff;background-image:url(:/Other/icons/list_background.png);background-position:center center;background-repeat:no-repeat;color:rgb(0,0,0);}</string>
      </property>
      <property name="selectionMode">
-       <enum>QAbstractItemView::NoSelection</enum>
+       <enum>QAbstractItemView::SelectionMode::NoSelection</enum>
      </property>
      <property name="verticalScrollMode">
-       <enum>QAbstractItemView::ScrollPerPixel</enum>
+       <enum>QAbstractItemView::ScrollMode::ScrollPerPixel</enum>
      </property>
      <property name="horizontalScrollMode">
-       <enum>QAbstractItemView::ScrollPerPixel</enum>
+       <enum>QAbstractItemView::ScrollMode::ScrollPerPixel</enum>
      </property>
     </widget>
    </item>
@@ -516,7 +516,7 @@
       <bool>false</bool>
      </property>
      <property name="alignment">
-       <set>Qt::AlignJustify|Qt::AlignVCenter</set>
+       <set>Qt::AlignmentFlag::AlignJustify|Qt::AlignmentFlag::AlignVCenter</set>
      </property>
     </widget>
    </item>
--- a/kindlecomicconverter/KCC_rc.py
+++ b/kindlecomicconverter/KCC_rc.py
@@ -1,6 +1,6 @@
 # Resource object code (Python 3)
 # Created by: object code
-# Created by: The Resource Compiler for Qt version 6.5.2
+# Created by: The Resource Compiler for Qt version 6.6.3
 # WARNING! All changes made in this file will be lost!

 from PySide6 import QtCore
@@ -11476,49 +11476,49 @@ qt_resource_struct = b"\
 \x00\x00\x00X\x00\x02\x00\x00\x00\x04\x00\x00\x00\x07\
 \x00\x00\x00\x00\x00\x00\x00\x00\
 \x00\x00\x01\xac\x00\x00\x00\x00\x00\x01\x00\x02&\xd7\
-\x00\x00\x01\x88;p\xbcB\
+\x00\x00\x01\x90(\xef\xc4\x03\
 \x00\x00\x01\xea\x00\x00\x00\x00\x00\x01\x00\x02{q\
-\x00\x00\x01\x88;p\xbcB\
+\x00\x00\x01\x90(\xef\xc4\x00\
 \x00\x00\x01\xd6\x00\x00\x00\x00\x00\x01\x00\x02Qv\
-\x00\x00\x01\x88;p\xbcB\
+\x00\x00\x01\x90(\xef\xc3\xff\
 \x00\x00\x01\xc2\x00\x00\x00\x00\x00\x01\x00\x02F\x13\
-\x00\x00\x01\x89\x89D9.\
+\x00\x00\x01\x90(\xef\xc4\x01\
 \x00\x00\x00X\x00\x02\x00\x00\x00\x03\x00\x00\x00\x0c\
 \x00\x00\x00\x00\x00\x00\x00\x00\
 \x00\x00\x00\xa6\x00\x00\x00\x00\x00\x01\x00\x01(\x97\
-\x00\x00\x01\x88;p\xbcB\
+\x00\x00\x01\x90(\xef\xc4\x03\
 \x00\x00\x00\x8c\x00\x00\x00\x00\x00\x01\x00\x01\x1d\x90\
-\x00\x00\x01\x88;p\xbcB\
+\x00\x00\x01\x90(\xef\xc4\x02\
 \x00\x00\x00\xbc\x00\x00\x00\x00\x00\x01\x00\x011\xef\
-\x00\x00\x01\x88;p\xbcB\
+\x00\x00\x01\x90(\xef\xc4\x04\
 \x00\x00\x00X\x00\x02\x00\x00\x00\x03\x00\x00\x00\x10\
 \x00\x00\x00\x00\x00\x00\x00\x00\
 \x00\x00\x02.\x00\x00\x00\x00\x00\x01\x00\x02\xad\xbd\
-\x00\x00\x01\x88;p\xbcJ\
+\x00\x00\x01\x90(\xef\xc4!\
 \x00\x00\x02\x00\x00\x00\x00\x00\x00\x01\x00\x02\x97\xc0\
-\x00\x00\x01\x88;p\xbcI\
+\x00\x00\x01\x90(\xef\xc4\x1d\
 \x00\x00\x02\x16\x00\x00\x00\x00\x00\x01\x00\x02\xa1\x1d\
-\x00\x00\x01\x88;p\xbcI\
+\x00\x00\x01\x90(\xef\xc4\x19\
 \x00\x00\x00X\x00\x02\x00\x00\x00\x07\x00\x00\x00\x14\
 \x00\x00\x00\x00\x00\x00\x00\x00\
 \x00\x00\x01\x08\x00\x00\x00\x00\x00\x01\x00\x01H\x9b\
-\x00\x00\x01\x88;p\xbcJ\
+\x00\x00\x01\x90(\xef\xc4\x22\
 \x00\x00\x01\x1e\x00\x00\x00\x00\x00\x01\x00\x01qC\
-\x00\x00\x01\x88;p\xbcI\
+\x00\x00\x01\x90(\xef\xc4\x1c\
 \x00\x00\x01\x80\x00\x00\x00\x00\x00\x01\x00\x01\xca\x17\
-\x00\x00\x01\x88;p\xbcI\
+\x00\x00\x01\x90(\xef\xc4\x1e\
 \x00\x00\x01f\x00\x00\x00\x00\x00\x01\x00\x01\x84\xd0\
-\x00\x00\x01\x88;p\xbcH\
+\x00\x00\x01\x90(\xef\xc4\x18\
 \x00\x00\x00\xf0\x00\x00\x00\x00\x00\x01\x00\x01D<\
-\x00\x00\x01\x88;p\xbcF\
+\x00\x00\x01\x90(\xef\xc4\x0e\
 \x00\x00\x00\xd4\x00\x00\x00\x00\x00\x01\x00\x017\xd3\
-\x00\x00\x01\x88;p\xbcH\
+\x00\x00\x01\x90(\xef\xc4\x17\
 \x00\x00\x01@\x00\x00\x00\x00\x00\x01\x00\x01z\x9a\
-\x00\x00\x01\x88;p\xbcH\
+\x00\x00\x01\x90(\xef\xc4\x18\
 \x00\x00\x00X\x00\x02\x00\x00\x00\x01\x00\x00\x00\x1c\
 \x00\x00\x00\x00\x00\x00\x00\x00\
 \x00\x00\x00h\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\
-\x00\x00\x01\x88;p\xbcH\
+\x00\x00\x01\x90(\xef\xc4\x16\
 "

 def qInitResources():
--- a/kindlecomicconverter/KCC_ui.py
+++ b/kindlecomicconverter/KCC_ui.py
@@ -3,7 +3,7 @@
 ################################################################################
 ## Form generated from reading UI file 'KCC.ui'
 ##
-## Created by: Qt User Interface Compiler version 6.5.2
+## Created by: Qt User Interface Compiler version 6.6.3
 ##
 ## WARNING! All changes made in this file will be lost when recompiling UI file!
 ################################################################################
@@ -26,7 +26,7 @@ class Ui_mainWindow(object):
    def setupUi(self, mainWindow):
        if not mainWindow.objectName():
            mainWindow.setObjectName(u"mainWindow")
-        mainWindow.resize(450, 400)
+        mainWindow.resize(481, 400)
        icon = QIcon()
        icon.addFile(u":/Icon/icons/comic2ebook.png", QSize(), QIcon.Normal, QIcon.Off)
        mainWindow.setWindowIcon(icon)
@@ -139,7 +139,7 @@ class Ui_mainWindow(object):
        self.gammaSlider.setObjectName(u"gammaSlider")
        self.gammaSlider.setMaximum(250)
        self.gammaSlider.setSingleStep(5)
-        self.gammaSlider.setOrientation(Qt.Horizontal)
+        self.gammaSlider.setOrientation(Qt.Orientation.Horizontal)

        self.horizontalLayout_2.addWidget(self.gammaSlider)

@@ -159,9 +159,9 @@ class Ui_mainWindow(object):

        self.croppingPowerSlider = QSlider(self.croppingWidget)
        self.croppingPowerSlider.setObjectName(u"croppingPowerSlider")
-        self.croppingPowerSlider.setMaximum(200)
+        self.croppingPowerSlider.setMaximum(300)
        self.croppingPowerSlider.setSingleStep(1)
-        self.croppingPowerSlider.setOrientation(Qt.Horizontal)
+        self.croppingPowerSlider.setOrientation(Qt.Orientation.Horizontal)

        self.horizontalLayout_3.addWidget(self.croppingPowerSlider)

@@ -170,7 +170,7 @@ class Ui_mainWindow(object):

        self.buttonWidget = QWidget(self.centralWidget)
        self.buttonWidget.setObjectName(u"buttonWidget")
-        sizePolicy = QSizePolicy(QSizePolicy.Preferred, QSizePolicy.Fixed)
+        sizePolicy = QSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Fixed)
        sizePolicy.setHorizontalStretch(0)
        sizePolicy.setVerticalStretch(0)
        sizePolicy.setHeightForWidth(self.buttonWidget.sizePolicy().hasHeightForWidth())
@@ -267,9 +267,9 @@ class Ui_mainWindow(object):
        self.jobList = QListWidget(self.centralWidget)
        self.jobList.setObjectName(u"jobList")
        self.jobList.setStyleSheet(u"QListWidget#jobList {background:#ffffff;background-image:url(:/Other/icons/list_background.png);background-position:center center;background-repeat:no-repeat;color:rgb(0,0,0);}")
-        self.jobList.setSelectionMode(QAbstractItemView.NoSelection)
-        self.jobList.setVerticalScrollMode(QAbstractItemView.ScrollPerPixel)
-        self.jobList.setHorizontalScrollMode(QAbstractItemView.ScrollPerPixel)
+        self.jobList.setSelectionMode(QAbstractItemView.SelectionMode.NoSelection)
+        self.jobList.setVerticalScrollMode(QAbstractItemView.ScrollMode.ScrollPerPixel)
+        self.jobList.setHorizontalScrollMode(QAbstractItemView.ScrollMode.ScrollPerPixel)

        self.gridLayout.addWidget(self.jobList, 2, 0, 1, 2)

@@ -278,7 +278,7 @@ class Ui_mainWindow(object):
        self.progressBar.setMinimumSize(QSize(0, 30))
        self.progressBar.setFont(font)
        self.progressBar.setVisible(False)
-        self.progressBar.setAlignment(Qt.AlignJustify|Qt.AlignVCenter)
+        self.progressBar.setAlignment(Qt.AlignmentFlag.AlignJustify|Qt.AlignmentFlag.AlignVCenter)

        self.gridLayout.addWidget(self.progressBar, 1, 0, 1, 2)

@@ -290,7 +290,7 @@ class Ui_mainWindow(object):
        self.gridLayout_3.setContentsMargins(0, 0, 0, 0)
        self.hLabel = QLabel(self.customWidget)
        self.hLabel.setObjectName(u"hLabel")
-        sizePolicy1 = QSizePolicy(QSizePolicy.Fixed, QSizePolicy.Preferred)
+        sizePolicy1 = QSizePolicy(QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Preferred)
        sizePolicy1.setHorizontalStretch(0)
        sizePolicy1.setVerticalStretch(0)
        sizePolicy1.setHeightForWidth(self.hLabel.sizePolicy().hasHeightForWidth())
--- a/kindlecomicconverter/KCC_ui_editor.py
+++ b/kindlecomicconverter/KCC_ui_editor.py
@@ -3,7 +3,7 @@
 ################################################################################
 ## Form generated from reading UI file 'MetaEditor.ui'
 ##
-## Created by: Qt User Interface Compiler version 6.5.2
+## Created by: Qt User Interface Compiler version 6.6.3
 ##
 ## WARNING! All changes made in this file will be lost when recompiling UI file!
 ################################################################################
@@ -117,7 +117,7 @@ class Ui_editorDialog(object):
        self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
        self.statusLabel = QLabel(self.optionWidget)
        self.statusLabel.setObjectName(u"statusLabel")
-        sizePolicy = QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding)
+        sizePolicy = QSizePolicy(QSizePolicy.Policy.MinimumExpanding, QSizePolicy.Policy.MinimumExpanding)
        sizePolicy.setHorizontalStretch(0)
        sizePolicy.setVerticalStretch(0)
        sizePolicy.setHeightForWidth(self.statusLabel.sizePolicy().hasHeightForWidth())
--- a/kindlecomicconverter/image.py
+++ b/kindlecomicconverter/image.py
@@ -23,6 +23,7 @@ import os
 import mozjpeg_lossless_optimization
 from PIL import Image, ImageOps, ImageStat, ImageChops, ImageFilter
 from .shared import md5Checksum
+from .page_number_crop_alg import get_bbox_crop_margin_page_number, get_bbox_crop_margin

 AUTO_CROP_THRESHOLD = 0.015

@@ -358,20 +359,6 @@ class ComicPage:
        else:
            return Image.Resampling.LANCZOS

-    def getBoundingBox(self, tmptmg):
-        min_margin = [int(0.005 * i + 0.5) for i in tmptmg.size]
-        max_margin = [int(0.1 * i + 0.5) for i in tmptmg.size]
-        bbox = tmptmg.getbbox()
-        bbox = (
-            max(0, min(max_margin[0], bbox[0] - min_margin[0])),
-            max(0, min(max_margin[1], bbox[1] - min_margin[1])),
-            min(tmptmg.size[0],
-                max(tmptmg.size[0] - max_margin[0], bbox[2] + min_margin[0])),
-            min(tmptmg.size[1],
-                max(tmptmg.size[1] - max_margin[1], bbox[3] + min_margin[1])),
-        )
-        return bbox
-
    def maybeCrop(self, box, minimum):
        box_area = (box[2] - box[0]) * (box[3] - box[1])
        image_area = self.image.size[0] * self.image.size[1]
@@ -379,26 +366,16 @@ class ComicPage:
            self.image = self.image.crop(box)

    def cropPageNumber(self, power, minimum):
-        if self.fill != 'white':
-            tmptmg = self.image.convert(mode='L')
-        else:
-            tmptmg = ImageOps.invert(self.image.convert(mode='L'))
-        tmptmg = tmptmg.point(lambda x: x and 255)
-        tmptmg = tmptmg.filter(ImageFilter.MinFilter(size=3))
-        tmptmg = tmptmg.filter(ImageFilter.GaussianBlur(radius=5))
-        tmptmg = tmptmg.point(lambda x: (x >= 16 * power) and x)
-        if tmptmg.getbbox():
-            self.maybeCrop(tmptmg.getbbox(), minimum)
+        bbox = get_bbox_crop_margin_page_number(self.image, power, self.fill)
+        
+        if bbox:
+            self.maybeCrop(bbox, minimum)

    def cropMargin(self, power, minimum):
-        if self.fill != 'white':
-            tmptmg = self.image.convert(mode='L')
-        else:
-            tmptmg = ImageOps.invert(self.image.convert(mode='L'))
-        tmptmg = tmptmg.filter(ImageFilter.GaussianBlur(radius=3))
-        tmptmg = tmptmg.point(lambda x: (x >= 16 * power) and x)
-        if tmptmg.getbbox():
-            self.maybeCrop(self.getBoundingBox(tmptmg), minimum)
+        bbox = get_bbox_crop_margin(self.image, power, self.fill)
+        
+        if bbox:
+            self.maybeCrop(bbox, minimum)


 class Cover:
--- a/kindlecomicconverter/page_number_crop_alg.py
+++ b/kindlecomicconverter/page_number_crop_alg.py
@@ -0,0 +1,210 @@
+from PIL import ImageOps, ImageFilter
+import numpy as np
+
+'''
+Some assupmptions on the page number sizes
+We assume that the size of the number (including all digits) is between
+'min_shape_size_tolerated_size' and 'max_shape_size_tolerated_size' relative to the image size.
+We assume the distance between the digit is no more than 'max_dist_size' (x,y), and no more than 3 digits.
+'''
+max_shape_size_tolerated_size = (0.015*3, 0.02) # percent
+min_shape_size_tolerated_size = (0.003, 0.006)  # percent
+window_h_size = max_shape_size_tolerated_size[1]*1.25 # percent
+max_dist_size = (0.01, 0.002) # percent
+
+
+'''
+E-reader screen real-estate is an important resource.
+More available screensize means more details can be better seen, especially text.
+Text is one of the most important elements that need to be clearly readable on e-readers,
+which mostly are smaller devices where the need to zoom is unwanted.
+
+By cropping the page number on the bottom of the page, 2%-5% of the page height can be regained
+that allows us to upscale the image even more.
+- Most of the times the screen height is the limiting factor in upscaling, rather than its width.
+
+    Parameters:
+            img (PIL image): A PIL image.
+            power (float): The power to 'chop' through pixels matching the background. Values in range[0,3].
+            background_color (string): 'white' for white background, anything else for black.
+    Returns:
+            bbox (4-tuple, left|top|right|bot): The tightest bounding box calculated after trying to remove the bottom page number. Returns None if couldnt find anything satisfactory
+'''
+def get_bbox_crop_margin_page_number(img, power=1, background_color='white'):
+    if img.mode != 'L':
+        img = ImageOps.grayscale(img)
+    
+    if background_color != 'white':
+        img = ImageOps.invert(img)
+        
+    '''
+    Autocontrast: due to some threshold values, it's important that the blacks will be blacks and white will be whites.
+    Box/MeanFilter: Allows us to reduce noise like bad a page scan or compression artifacts.
+    Note: MedianFilter works better in my experience, but takes 2x-3x longer to perform.
+    '''
+    img = ImageOps.autocontrast(img, 1).filter(ImageFilter.BoxBlur(1))
+    
+    '''
+    The 'power' parameters determines the threshold. The higher the power, the more "force" it can crop through black pixels (in case of white background)
+    and the lower the power, more sensitive to black pixels. 
+    '''
+    threshold = threshold_from_power(power)
+    bw_img = img.point(lambda p: 255 if p <= threshold else 0)
+    left, top_y_pos, right, bot_y_pos = bw_img.getbbox()
+    
+    '''
+    We inspect the lower bottom part of the image where we suspect might be a page number.
+    We assume that page number consist of 1 to 3 digits and the total min and max size of the number
+    is between 'min_shape_size_tolerated_size' and 'max_shape_size_tolerated_size'.
+    '''
+    window_h = int(img.size[1] * window_h_size)
+    img_part = img.crop((left,bot_y_pos-window_h, right, bot_y_pos))
+
+    '''
+    We detect related-pixels by proximity, with max distance defined in 'max_dist_size'.
+    Related pixels (in x axis) for each image-row are then merged to boxes with adjacent rows (in y axis)
+    to form bounding boxes of the detected objects (which one of them could be the page number).
+    '''
+    img_part_mat = np.array(img_part)
+    window_groups = []
+    for i in range(img_part.size[1]):
+        row_groups = [(g[0], g[1], i, i) for g in group_pixels(img_part_mat[i], img.size[0]*max_dist_size[0], threshold)]
+        window_groups.extend(row_groups)
+
+    window_groups = np.array(window_groups)
+
+    boxes = merge_boxes(window_groups, (img.size[0]*max_dist_size[0], img.size[1]*max_dist_size[1]))
+    '''
+    We assume that the lowest part of the image that has black pixels on is the page number.
+    In case that there are more than one detected object in the loewst part, we assume that one of them is probably
+    manga-content and shouldn't be cropped.
+    '''
+    # filter all small objects
+    boxes = list(filter(lambda box: box[1]-box[0] >= img.size[0]*min_shape_size_tolerated_size[0] 
+                        and box[3]-box[2] >= img.size[1]*min_shape_size_tolerated_size[1], boxes))
+    lowest_boxes = list(filter(lambda box: box[3] == window_h-1, boxes))
+    
+    min_y_of_lowest_boxes = 0
+    if len(lowest_boxes) > 0:
+        min_y_of_lowest_boxes = np.min(np.array(lowest_boxes)[:,2])
+    
+    boxes_in_same_y_range = list(filter(lambda box: box[3] >= min_y_of_lowest_boxes, boxes))
+
+    max_shape_size_tolerated = (img.size[0] * max_shape_size_tolerated_size[0], 
+                                max(img.size[1] *max_shape_size_tolerated_size[1], 3))
+
+    should_force_crop = (
+        len(boxes_in_same_y_range) == 1
+        and (boxes_in_same_y_range[0][1] - boxes_in_same_y_range[0][0] <= max_shape_size_tolerated[0])
+        and (boxes_in_same_y_range[0][3] - boxes_in_same_y_range[0][2] <= max_shape_size_tolerated[1])
+    )
+        
+    cropped_bbox = (0, 0, img.size[0], img.size[1])
+    if should_force_crop:
+        cropped_bbox = (0, 0, img.size[0], bot_y_pos-(window_h-boxes_in_same_y_range[0][2]+1))
+
+    cropped_bbox = bw_img.crop(cropped_bbox).getbbox()
+    
+    return cropped_bbox
+
+
+'''
+    Parameters:
+            img (PIL image): A PIL image.
+            power (float): The power to 'chop' through pixels matching the background. Values in range[0,3].
+            background_color (string): 'white' for white background, anything else for black.
+    Returns:
+            bbox (4-tuple, left|top|right|bot): The tightest bounding box calculated after trying to remove the bottom page number. Returns None if couldnt find anything satisfactory
+'''
+def get_bbox_crop_margin(img, power=1, background_color='white'):
+    if img.mode != 'L':
+        img = ImageOps.grayscale(img)
+    
+    if background_color != 'white':
+        img = ImageOps.invert(img)
+        
+    '''
+    Autocontrast: due to some threshold values, it's important that the blacks will be blacks and white will be whites.
+    Box/MeanFilter: Allows us to reduce noise like bad a page scan or compression artifacts.
+    Note: MedianFilter works better in my experience, but takes 2x-3x longer to perform.
+    '''
+    img = ImageOps.autocontrast(img, 1).filter(ImageFilter.BoxBlur(1))
+    
+    '''
+    The 'power' parameters determines the threshold. The higher the power, the more "force" it can crop through black pixels (in case of white background)
+    and the lower the power, more sensitive to black pixels. 
+    '''
+    threshold = threshold_from_power(power)
+    bw_img = img.point(lambda p: 255 if p <= threshold else 0)
+
+    return bw_img.getbbox()
+
+
+'''
+Groups close pixels together (x axis)
+'''
+def group_pixels(row, max_dist_tolerated, threshold):
+    groups = []
+    idx = np.where(row <= threshold)[0]
+
+    group_start = -1
+    group_end = 0
+    for i in range(len(idx)):
+        dist = idx[i] - group_end
+        if group_start == -1:
+            group_start = idx[i]
+            group_end = idx[i]
+        elif dist <= max_dist_tolerated:
+            group_end = idx[i]
+        else:
+            groups.append((group_start, group_end))
+            group_start = -1
+            group_end = -1
+            
+    if group_start != -1:
+        groups.append((group_start, group_end))
+        
+    return groups
+
+
+def box_intersect(box1, box2, max_dist):
+    return not (box2[0]-max_dist[0] > box1[1]
+        or box2[1]+max_dist[0] < box1[0]
+        or box2[2]-max_dist[1] > box1[3]
+        or box2[3]+max_dist[1] < box1[2])
+
+'''
+Merge close bounding boxes (left,right, top,bot) (x axis) with distance threshold defined in
+'max_dist_tolerated'. Boxes with less 'max_dist_tolerated' distance (Chebyshev distance).
+'''
+def merge_boxes(boxes, max_dist_tolerated):
+    j = 0
+    while j < len(boxes)-1:
+        g1 = boxes[j]
+        intersecting_boxes = []
+        other_boxes = []
+        for i in range(j+1,len(boxes)):
+            g2 = boxes[i]
+            if box_intersect(g1,g2, max_dist_tolerated):
+                intersecting_boxes.append(g2)
+            else:
+                other_boxes.append(g2)
+        
+        if len(intersecting_boxes) > 0:
+            intersecting_boxes = np.array([g1, *intersecting_boxes])
+            merged_box = np.array([
+                np.min(intersecting_boxes[:,0]), 
+                np.max(intersecting_boxes[:,1]),
+                np.min(intersecting_boxes[:,2]), 
+                np.max(intersecting_boxes[:,3])
+            ])
+            other_boxes.append(merged_box)
+            boxes = np.concatenate([boxes[:j], other_boxes])
+            j = 0
+        else:
+            j += 1
+    return boxes
+
+
+def threshold_from_power(power):
+    return 240-(power*64)
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,4 @@ packaging>=23.2
 mozjpeg-lossless-optimization>=1.1.2
 natsort[fast]>=8.4.0
 distro>=1.8.0
+numpy>=1.22.4,<2.0.0
--- a/setup.py
+++ b/setup.py
@@ -83,6 +83,7 @@ setuptools.setup(
        'mozjpeg-lossless-optimization>=1.1.2',
        'natsort[fast]>=8.4.0',
        'distro',
+        'numpy>=1.22.4,<2.0.0'
    ],
    classifiers=[],
    zip_safe=False,