Coordinates system

The size of a page of a document can be obtained in the document object information.

There are 2 sizes with the format [width, height]: Size of the pdf page:
document.pages[i]['size']

Size of the pdf page converted by the app. The document characters bounding boxes and annotations are based on this size.

document.pages[i]['original_size']

E.g:
size = [1522, 1970]
original_size = [612.0, 792.0]

The coordinates system used has its origin in the bottom left corner of the page.

coordinates_system

To visualize the character bounding boxes of a document we can resize the image to the size in which they are based. The following code can be used for this:

doc_bbox = document.get_bbox()
image_path = document.image_paths[i]

size = document.pages[i]['size']  
original_size = document.pages[i]['original_size']

factor_x = original_size[0] / size[0]
factor_y = original_size[1] / size[1]

image = Image.open(image_path).convert('RGB')
image.thumbnail((int(image.size[0] * factor_x), int(image.size[1] * factor_y)), Image.ANTIALIAS)
height = image.size[1]

image_characters_bbox = [char_bbox for _, char_bbox in doc_bbox.items() if char_bbox["page_number"] - 1 == i]

for bbox in image_characters_bbox:
    pp1 = (int(bbox["x0"]), int((height - bbox["y0"])))
    pp2 = (int(bbox["x1"]), int((height - bbox["y1"])))
    image = cv2.rectangle(np.array(image), pp1, pp2, (0, 255, 0), 1)

plt.imshow(image)
plt.show()

# Note: cv2 has the origin of the y coordinates in the upper left corner. Therefore, for visualization, the height of the
# image is subtracted to the y coordinates.

characters_bboxes

The coordinates obtained from the segmentation endpoint of the API are based in a image scaled with a factor = 72/300 (same for x and y). To visualize the segmentation bounding boxes of a document we can resize the image to the size in which they are based.

image_path = document.image_paths[i]

factor = 72 / 300

image = Image.open(image_path).convert('RGB')
image.thumbnail((int(image.size[0] * factor), int(image.size[1] * factor)), Image.ANTIALIAS)
height = image.size[1]

image_segmentation_bboxes = get_results_from_segmentation(document.id, project_id)
image_segmentation_bboxes = [segm_bbox for segm_bbox in image_segmentation_bboxes if segm_bbox["page_index"] == i]

for bbox in image_segmentation_bboxes:
    pp1 = (int(bbox["x0"]), int((height - bbox["y0"])))
    pp2 = (int(bbox["x1"]), int((height - bbox["y1"])))
    image = cv2.rectangle(np.array(image), pp1, pp2, (255, 0, 0), 1)

plt.imshow(image)
plt.show()

segmentation_bboxes

To visualize both at the same time. We can convert the coordinates from the segmentation result to be based on the image size used for the characters bbox.

doc_bbox = document.get_bbox()
image_path = document.image_paths[i]

factor_x = original_size[0] / size[0]
factor_y = original_size[1] / size[1]

image = Image.open(image_path).convert('RGB')
image_factor = image.copy()

image.thumbnail((int(image.size[0] * factor_x), int(image.size[1] * factor_y)), Image.ANTIALIAS)
height = image.size[1]

image_characters_bbox = [char_bbox for _, char_bbox in doc_bbox.items() if char_bbox["page_number"] - 1 == i]

for bbox in image_characters_bbox:
    pp1 = (int(bbox["x0"]), int((height - bbox["y0"])))
    pp2 = (int(bbox["x1"]), int((height - bbox["y1"])))
    image = cv2.rectangle(np.array(image), pp1, pp2, (0, 255, 0), 1)

factor = 72 / 300
image_factor.thumbnail((int(image_factor.size[0] * factor), int(image_factor.size[1] * factor)), Image.ANTIALIAS)
height_image_factor = image_factor.size[1]

factor_segm_x = original_size[0] / image_factor.size[0]
factor_segm_y = original_size[1] / image_factor.size[1]

image_segmentation_bboxes = get_results_from_segmentation(document.id, project_id)
image_segmentation_bboxes = [segm_bbox for segm_bbox in image_segmentation_bboxes if segm_bbox["page_index"] == i]

for bbox in image_segmentation_bboxes:
    pp1 = (int(bbox["x0"] * factor_segm_x), int((height_image_factor - bbox["y0"]) * factor_segm_y))
    pp2 = (int(bbox["x1"] * factor_segm_x), int((height_image_factor - bbox["y1"]) * factor_segm_y))
    image = cv2.rectangle(np.array(image), pp1, pp2, (255, 0, 0), 1)

plt.imshow(image)
plt.show()

characters_and_segmentation_bboxes

results matching ""

    No results matching ""