From ae3178ed88c65badfe81b8636c063a3d13f854b5 Mon Sep 17 00:00:00 2001 From: James Sherratt Date: Sun, 20 Oct 2019 22:06:00 +0100 Subject: [PATCH 1/4] Add heapsort A script demonstrating heapsort using a max heap has been added. Comments have been added, describing the steps. --- sorting/heapsort.py | 120 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 sorting/heapsort.py diff --git a/sorting/heapsort.py b/sorting/heapsort.py new file mode 100644 index 0000000..a06f645 --- /dev/null +++ b/sorting/heapsort.py @@ -0,0 +1,120 @@ +""" +:Author: James Sherratt +:Date: 20/10/2019 +:License: MIT + +:name: heapsort.py + +Heap sorts a list-like object. Note: this has been written with code-clarity +in mind first, efficiency second. +""" + +from random import randint + + +def get_left(i): + """ + Get the left element index of a heap from an array. + :param i: The parent index. + :return: the left element. + """ + return 2 * i + 1 + + +def get_right(i): + """ + Get the right element index of a heap from an array. + :param i: The parent index. + :return: the right element. + """ + return 2 * i + 2 + + +def repair_heap(vals_list, root, arr_top): + """ + Sifts the root element of a heap to the correct position, to + correct a max heap. This assumes the children of the parent node are max heaps. + + :param vals_list: list of values, which represents a heap structure. + :param root: the index of the node we're working from/ using as a root. + :param arr_top: the largest value of the list we're interested in. + :return: Reference to the passed list, with the root node in the correct position. + """ + # This is the value to swap. We want to swap the root value down, so we swap the root first. + swap = root + + # Get left and right nodes of root. + left = get_left(root) + right = get_right(root) + while left < arr_top: + # Check if value to swap is less than the left child. + if vals_list[swap] < vals_list[left]: + swap = left + # Check if value to swap is less than the right child (if exists). + # Note: these 2 if's could be combined using "and", but then we're relying on lazy evaluation. + if right < arr_top: + if vals_list[swap] < vals_list[right]: + swap = right + # Check if the swap is still the root. If so, there's no more children to swap and we're done. + if swap == root: + return vals_list + # Else, swap. + else: + vals_list[root], vals_list[swap] = vals_list[swap], vals_list[root] + # New root, left and right node for the next iteration. + root = swap + left = get_left(root) + right = get_right(root) + return vals_list + + +def max_heap(vals_list): + """ + Convert a list of values into a max heap tree. + + :param vals_list: list of numbers. + :return: the same list as a max heap tree. + """ + # Create a max heap by repairing the heap, starting from the nodes one above the leaf nodes. + len_list = len(vals_list) + for root in range(len_list//2, -1, -1): + repair_heap(vals_list, root, len_list) + return vals_list + + +def max_heap_to_sorted(vals_list): + """ + Convert a max heap list into a sorted list. + + :param vals_list: list containing max heap. + :return: the same list of values, sorted. + """ + # i is the index of the last element of the slice of the array that needs sorting. + for top in range(len(vals_list)-1, 0, -1): + # Swap the root value (max) with the last value of the slice. + vals_list[0], vals_list[top] = vals_list[top], vals_list[0] + # Sift the new root to the correct position of the remainder of the max heap. + # Another way of doing this is to pass a slice of the vals_list up to the value top, but python passes + # slices by copy so there's a massive performance hit. + repair_heap(vals_list, 0, top) + return vals_list + + + +def heapsort(vals_list): + """ + Sort a list of values using heapsort. + + :param vals_list: list of sortable values. + :return: the same list, sorted. + """ + max_heap(vals_list) + return max_heap_to_sorted(vals_list) + + +if __name__ == "__main__": + list_len = 100000 + vals_list = [randint(0, (2**16)) for i in range(list_len)] + heap_sorted = heapsort(list(vals_list)) + py_sorted = sorted(vals_list) + print("Did the sort work? {}".format(heap_sorted == py_sorted)) \ No newline at end of file From fc6375a039e0c8a9272cd1d758f664ff15073238 Mon Sep 17 00:00:00 2001 From: James Sherratt Date: Sun, 20 Oct 2019 22:10:25 +0100 Subject: [PATCH 2/4] Better formatting Adjusted the spacing of the functions to be more PEP friendly. --- sorting/heapsort.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sorting/heapsort.py b/sorting/heapsort.py index a06f645..478caf1 100644 --- a/sorting/heapsort.py +++ b/sorting/heapsort.py @@ -65,6 +65,7 @@ def repair_heap(vals_list, root, arr_top): root = swap left = get_left(root) right = get_right(root) + return vals_list @@ -79,6 +80,7 @@ def max_heap(vals_list): len_list = len(vals_list) for root in range(len_list//2, -1, -1): repair_heap(vals_list, root, len_list) + return vals_list @@ -97,8 +99,8 @@ def max_heap_to_sorted(vals_list): # Another way of doing this is to pass a slice of the vals_list up to the value top, but python passes # slices by copy so there's a massive performance hit. repair_heap(vals_list, 0, top) - return vals_list + return vals_list def heapsort(vals_list): From cc5193b32bbbad5c2bd63b6878d0fd9bfa6433e9 Mon Sep 17 00:00:00 2001 From: James Sherratt Date: Sun, 20 Oct 2019 22:16:19 +0100 Subject: [PATCH 3/4] Better documentation Corrected some of the function docs. --- sorting/heapsort.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sorting/heapsort.py b/sorting/heapsort.py index 478caf1..4317bb9 100644 --- a/sorting/heapsort.py +++ b/sorting/heapsort.py @@ -14,7 +14,7 @@ from random import randint def get_left(i): """ - Get the left element index of a heap from an array. + Get the left element index of a heap node for an array. :param i: The parent index. :return: the left element. """ @@ -23,7 +23,7 @@ def get_left(i): def get_right(i): """ - Get the right element index of a heap from an array. + Get the right element index of a heap node for an array. :param i: The parent index. :return: the right element. """ @@ -33,7 +33,7 @@ def get_right(i): def repair_heap(vals_list, root, arr_top): """ Sifts the root element of a heap to the correct position, to - correct a max heap. This assumes the children of the parent node are max heaps. + correct a max heap. This assumes the children of the root node are max heaps. :param vals_list: list of values, which represents a heap structure. :param root: the index of the node we're working from/ using as a root. @@ -65,7 +65,6 @@ def repair_heap(vals_list, root, arr_top): root = swap left = get_left(root) right = get_right(root) - return vals_list From 7e92b4ca53cb1498a877c8906973880694448919 Mon Sep 17 00:00:00 2001 From: James Sherratt Date: Sun, 20 Oct 2019 22:23:09 +0100 Subject: [PATCH 4/4] More space corrections. Added space at end of file + at another return --- sorting/heapsort.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sorting/heapsort.py b/sorting/heapsort.py index 4317bb9..f474adf 100644 --- a/sorting/heapsort.py +++ b/sorting/heapsort.py @@ -33,7 +33,7 @@ def get_right(i): def repair_heap(vals_list, root, arr_top): """ Sifts the root element of a heap to the correct position, to - correct a max heap. This assumes the children of the root node are max heaps. + correct a max heap. This assumes the children of the root/ node are max heaps. :param vals_list: list of values, which represents a heap structure. :param root: the index of the node we're working from/ using as a root. @@ -65,6 +65,7 @@ def repair_heap(vals_list, root, arr_top): root = swap left = get_left(root) right = get_right(root) + return vals_list @@ -118,4 +119,4 @@ if __name__ == "__main__": vals_list = [randint(0, (2**16)) for i in range(list_len)] heap_sorted = heapsort(list(vals_list)) py_sorted = sorted(vals_list) - print("Did the sort work? {}".format(heap_sorted == py_sorted)) \ No newline at end of file + print("Did the sort work? {}".format(heap_sorted == py_sorted))