Sudoku Checker

Starting file: check_sudoku.py.

Tests file: test_check_sudoku.py.

This program accepts a filename for a text file with a 9x9 sudoku and prints ok if the Sudoku is solved and not solved if it is not.

If the given Sudoku file does not have 9 rows and 9 columns or has characters besides the numbers 1 through 9, then an error is printed.

from argparse import ArgumentParser, FileType


def check_unique_num_list(list):
    is_unique = False
    for i in list:
        if list.count(i) == 1:
            is_unique = True
        else:
            is_unique = False
            break
    return is_unique


def check_vertical_line(column):
    a = []
    for i in range(9):
        a.append(sudoku_matrix[i][column])
    return check_unique_num_list(a)


def get_square(a, b, c, d):
    square = []
    for j in range(a, b):
        for i in range(c, d):
            square.append(sudoku_matrix[j][i])
    return(check_unique_num_list(square))


def check_all_horizontal_numbers():
    is_unique = False
    for i in range(9):
        if check_unique_num_list(sudoku_matrix[i]):
            is_unique = True
        else:
            is_unique = False
            break
    return is_unique


def check_all_vertical_numbers():
    is_unique = False
    for i in range(9):
        if check_vertical_line(i):
            is_unique = True
        else:
            is_unique = False
            break
    return is_unique


def check_all_squares():
    squares = {
         0: get_square(0, 3, 0, 3),
         1: get_square(0, 3, 3, 6),
         2: get_square(0, 3, 6, 9),
         3: get_square(3, 6, 0, 3),
         4: get_square(3, 6, 3, 6),
         5: get_square(3, 6, 6, 9),
         6: get_square(6, 9, 0, 3),
         7: get_square(6, 9, 3, 6),
         8: get_square(6, 9, 6, 9),
    }
    are_all_squares_ok = False
    for i in squares:
        if squares.get(i):
            are_all_squares_ok = True
        else:
            are_all_squares_ok = False
            break
    return are_all_squares_ok


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("sudoku_file", type=FileType("rt"))
    args = parser.parse_args()
    try:
        sudoku_matrix = [
            [int(n) for n in line.split()]
            for line in args.sudoku_file
        ]
    except ValueError as e:
        raise SystemExit(str(e)) from e
    if len(sudoku_matrix) != 9:
        raise SystemExit("File must have exactly 9 rows")
    if {len(row) for row in sudoku_matrix} != {9}:
        raise SystemExit("Each row must have exactly 9 columns")
    if not ({n for row in sudoku_matrix for n in row} <= set(range(1, 10))):
        raise SystemExit("Each column must contain the numbers 1 to 9 only")
    if check_all_horizontal_numbers() and check_all_vertical_numbers() and check_all_squares():
        print("ok")
    else:
        print("not solved")

What could we do to improve this program?

Main

Let’s focus on the logic at the bottom first:

if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("sudoku_file", type=FileType("rt"))
    args = parser.parse_args()
    try:
        sudoku_matrix = [
            [int(n) for n in line.split()]
            for line in args.sudoku_file
        ]
    except ValueError as e:
        raise SystemExit(str(e)) from e
    if len(sudoku_matrix) != 9:
        raise SystemExit("File must have exactly 9 rows")
    if {len(row) for row in sudoku_matrix} != {9}:
        raise SystemExit("Each row must have exactly 9 columns")
    if not ({n for row in sudoku_matrix for n in row} <= set(range(1, 10))):
        raise SystemExit("Each column must contain the numbers 1 to 9 only")
    if check_all_horizontal_numbers() and check_all_vertical_numbers() and check_all_squares():
        print("ok")
    else:
        print("not solved")

There’s a lot of logic here related to parsing the Sudoku file. Let’s make a separate function for that:

def sudoku_matrix_from_file(lines):
    try:
        rows = [
            [int(n) for n in line.split()]
            for line in lines
        ]
    except ValueError as e:
        raise SystemExit(str(e)) from e
    if len(sudoku_matrix) != 9:
        raise SystemExit("File must have exactly 9 rows")
    if {len(row) for row in sudoku_matrix} != {9}:
        raise SystemExit("Each row must have exactly 9 columns")
    if not ({n for row in sudoku_matrix for n in row} <= set(range(1, 10))):
        raise SystemExit("Each column must contain the numbers 1 to 9 only")
    return rows

That allows us to shorten our if statement quite a bit:

if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("sudoku_file", type=FileType("rt"))
    args = parser.parse_args()
    sudoku_matrix = sudoku_matrix_from_file(args.sudoku_file)
    if check_all_horizontal_numbers() and check_all_vertical_numbers() and check_all_squares():
        print("ok")
    else:
        print("not solved")

What about that long condition? We could make a function to check all 3 of those conditions in that very long if for us:

def check_sudoku():
    return (check_all_horizontal_numbers()
            and check_all_vertical_numbers()
            and check_all_squares())

This shortens our code even more… but you might have noticed something odd with this code:

if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("sudoku_file", type=FileType("rt"))
    args = parser.parse_args()
    sudoku_matrix = sudoku_matrix_from_file(args.sudoku_file)
    if check_sudoku():
        print("ok")
    else:
        print("not solved")

We aren’t passing in a Sudoku matrix to check_sudoku or to any of the three functions it calls. Instead, each of those functions relies on our global sudoku_matrix variable.

Let’s update all our functions to accept our matrix as an argument instead!

def check_vertical_line(sudoku_matrix, column):
    ...

def get_square(sudoku_matrix, a, b, c, d):
    ...

def check_all_horizontal_numbers(sudoku_matrix):
    ...

def check_all_vertical_numbers(sudoku_matrix):
    ...

def check_all_squares(sudoku_matrix):
    ...

def check_sudoku(sudoku_matrix):
    return (check_all_horizontal_numbers(sudoku_matrix)
            and check_all_vertical_numbers(sudoku_matrix)
            and check_all_squares(sudoku_matrix))

Then we’ll move our logic into a main function, so we don’t have a global variable with the same name as all those local variables:

def main():
    parser = ArgumentParser()
    parser.add_argument("sudoku_file", type=FileType("rt"))
    args = parser.parse_args()
    matrix = sudoku_matrix_from_file(args.sudoku_file)
    if check_sudoku(matrix):
        print("ok")
    else:
        print("not solved")


if __name__ == "__main__":
    main()

Checking Rows

Let’s refactor this function that checks each of the rows in the matrix:

def check_all_horizontal_numbers(sudoku_matrix):
    is_unique = False
    for i in range(9):
        if check_unique_num_list(sudoku_matrix[i]):
            is_unique = True
        else:
            is_unique = False
            break
    return is_unique

We’re looking through each row by using a counter and indexing the list. Let’s just loop over the matrix instead:

def check_all_horizontal_numbers(sudoku_matrix):
    is_unique = False
    for n in sudoku_matrix:
        if check_unique_num_list(n):
            is_unique = True
        else:
            is_unique = False
            break
    return is_unique

That check we’re doing to make sure each is unique seems pretty similar to what the any and all functions do in Python.

Let’s use the all function with a generator expression here:

def check_all_horizontal_numbers(sudoku_matrix):
    return all(
        check_unique_num_list(n)
        for n in sudoku_matrix
    )

That’s a lot more readable!

What is that check_unique_num_list doing?

Checking For Uniqueness

The check_unique_num_list function returns True if all numbers are unique.

We’re doing this by looping through a given list and counting the number of times we see each item to make sure it’s just once:

def check_unique_num_list(list):
    is_unique = False
    for i in list:
        if list.count(i) == 1:
            is_unique = True
        else:
            is_unique = False
            break
    return is_unique

This also looks like something that could be replaced by all:

def check_unique_num_list(list):
    return all(
        list.count(i) == 1
        for i in list
    )

But there’s probably a better way to do this. We really just need to make sure none of our numbers are duplicated. To do that, we could turn our list into a set (which will remove duplicates) and then make sure the length of the set is the same as our original list:

def check_unique_num_list(list):
    return len(set(list)) == len(list)

That list variable is shadowing the built-in list function. That works, but it’s probably not a great idea to shadow a built-in variable name. Let’s change that to numbers:

def check_unique_num_list(numbers):
    return len(set(numbers)) == len(numbers)

And while we’re renaming, let’s also rename our function to something clearer:

def has_no_duplicates(numbers):
    return len(set(numbers)) == len(numbers)

We’ll also need to change all three of the places where the function is called to use the new name.

Checking Columns

Let’s refactor this function that checks each of the columns in the matrix:

def check_all_vertical_numbers(sudoku_matrix):
    is_unique = False
    for i in range(9):
        if check_vertical_line(sudoku_matrix, i):
            is_unique = True
        else:
            is_unique = False
            break
    return is_unique

This looks like a good use for the all function again:

def check_all_vertical_numbers(sudoku_matrix):
    return all(
        check_vertical_line(sudoku_matrix, i)
        for i in range(9)
    )

We can’t currently avoid indexing because we’re using a function that relies on the index of our column.

Let’s look at that check_vertical_line function:

def check_vertical_line(sudoku_matrix, column):
    a = []
    for i in range(9):
        a.append(sudoku_matrix[i][column])
    return has_no_duplicates(a)

We could refactor this function to use a list comprehension and to loop over rows directly instead of using that i index:

def check_vertical_line(sudoku_matrix, column):
    return has_no_duplicates([
        row[column]
        for row in sudoku_matrix:
    ])

We’re using that index to extract each column from our matrix. But what if we transposed our list-of-lists, to make the columns into rows and the rows into columns? We could make a function that does that by using Python’s zip function along with * to unpack all rows into zip:

def transpose(matrix):
    return zip(*matrix)

Now our check_all_vertical_numbers function could do essentially the same thing as check_all_horizontal_numbers, except that it would loop over the columns by using transpose:

def check_all_vertical_numbers(sudoku_matrix):
    return all(
        has_no_duplicates(column)
        for column in transpose(sudoku_matrix)
    )

We can delete our check_vertical_line helper function now.

Checking 3x3 Squares

Let’s take a look at the check_all_squares and get_square functions now:

def get_square(sudoku_matrix, a, b, c, d):
    square = []
    for j in range(a, b):
        for i in range(c, d):
            square.append(sudoku_matrix[j][i])
    return(has_no_duplicates(square))

def check_all_squares(sudoku_matrix):
    squares = {
         0: get_square(sudoku_matrix, 0, 3, 0, 3),
         1: get_square(sudoku_matrix, 0, 3, 3, 6),
         2: get_square(sudoku_matrix, 0, 3, 6, 9),
         3: get_square(sudoku_matrix, 3, 6, 0, 3),
         4: get_square(sudoku_matrix, 3, 6, 3, 6),
         5: get_square(sudoku_matrix, 3, 6, 6, 9),
         6: get_square(sudoku_matrix, 6, 9, 0, 3),
         7: get_square(sudoku_matrix, 6, 9, 3, 6),
         8: get_square(sudoku_matrix, 6, 9, 6, 9),
    }
    are_all_squares_ok = False
    for i in squares:
        if squares.get(i):
            are_all_squares_ok = True
        else:
            are_all_squares_ok = False
            break
    return are_all_squares_ok

That check_all_squares function looks like it could use the all function:

def check_all_squares(sudoku_matrix):
    squares = {
         0: get_square(sudoku_matrix, 0, 3, 0, 3),
         1: get_square(sudoku_matrix, 0, 3, 3, 6),
         2: get_square(sudoku_matrix, 0, 3, 6, 9),
         3: get_square(sudoku_matrix, 3, 6, 0, 3),
         4: get_square(sudoku_matrix, 3, 6, 3, 6),
         5: get_square(sudoku_matrix, 3, 6, 6, 9),
         6: get_square(sudoku_matrix, 6, 9, 0, 3),
         7: get_square(sudoku_matrix, 6, 9, 3, 6),
         8: get_square(sudoku_matrix, 6, 9, 6, 9),
    }
    return all(
        squares.get(i)
        for i in squares
    )

It seems a little odd that we’re using a dictionary for those squares results though. We’re essentially just looping over each value.

Let’s use a list instead:

def check_all_squares(sudoku_matrix):
    squares = [
         get_square(sudoku_matrix, 0, 3, 0, 3),
         get_square(sudoku_matrix, 0, 3, 3, 6),
         get_square(sudoku_matrix, 0, 3, 6, 9),
         get_square(sudoku_matrix, 3, 6, 0, 3),
         get_square(sudoku_matrix, 3, 6, 3, 6),
         get_square(sudoku_matrix, 3, 6, 6, 9),
         get_square(sudoku_matrix, 6, 9, 0, 3),
         get_square(sudoku_matrix, 6, 9, 3, 6),
         get_square(sudoku_matrix, 6, 9, 6, 9),
    ]
    return all(squares)

It seems like our get_square function might be doing more than its name implies. It’s not just “getting” a square but actually checking whether that square has any duplicates. Let’s move the duplicate checking logic from get_square into check_all_squares:

def get_square(sudoku_matrix, a, b, c, d):
    square = []
    for j in range(a, b):
        for i in range(c, d):
            square.append(sudoku_matrix[j][i])
    return square

def check_all_squares(sudoku_matrix):
    squares = [
         get_square(sudoku_matrix, 0, 3, 0, 3),
         get_square(sudoku_matrix, 0, 3, 3, 6),
         get_square(sudoku_matrix, 0, 3, 6, 9),
         get_square(sudoku_matrix, 3, 6, 0, 3),
         get_square(sudoku_matrix, 3, 6, 3, 6),
         get_square(sudoku_matrix, 3, 6, 6, 9),
         get_square(sudoku_matrix, 6, 9, 0, 3),
         get_square(sudoku_matrix, 6, 9, 3, 6),
         get_square(sudoku_matrix, 6, 9, 6, 9),
    ]
    return all(
        has_no_duplicates(square)
        for square in squares
    )

We could also consider generating those get_square calls programmatically:

def check_all_squares(sudoku_matrix):
    """Return True if the numbers in each 3x3 Sudoku square are unique."""
    squares = [
        get_square(sudoku_matrix, i, i+3, j, j+3)
        for i in range(0, 9, 3)
        for j in range(0, 9, 3)
    ]
    return all(
        has_no_duplicates(square)
        for square in squares
    )

Our code was more explicit before, but it also took a bit of thinking to figure out what it was doing. This makes our algorithm more explicit but makes the specifics of each individual square more implicit.

Now let’s focus on that get_square function a bit more. We’re doing a lot of indexing there. We’re trying to get items between the row indexes a and b and between the column indexes c and d. Could we use slicing for that instead?

def get_square(sudoku_matrix, a, b, c, d):
    square = []
    for row in sudoku_matrix[a:b]:
        for n in row[c:d]:
            square.append(n)
    return square

It seems a little more apparent now that we’re getting items between particular indexes (after all, that is specifically what slicing is for).

That loop also looks like something we could refactor into a comprehension.

def get_square(sudoku_matrix, a, b, c, d):
    square = [
        n
        for row in sudoku_matrix[a:b]
        for n in row[c:d]
    ]
    return square

And we could remove that unnecessary square variable as well:

def get_square(sudoku_matrix, a, b, c, d):
    return [
        n
        for row in sudoku_matrix[a:b]
        for n in row[c:d]
    ]

Documentation

Let’s add helpful docstrings to each of these functions:

"""Script to verify whether a given 9x9 Sudoku text file is valid/solved."""
from argparse import ArgumentParser, FileType


def transpose(matrix):
    """Transpose the given matrix (columns become rows)."""
    ...


def has_no_duplicates(numbers):
    """Return True if all the iterable's items are unique."""
    ...


def get_square(sudoku_matrix, a, b, c, d):
    """Return all numbers between rows a & b and columns c & d."""
    ...


def check_all_horizontal_numbers(sudoku_matrix):
    """Return True if the numbers in each row are unique."""
    ...


def check_all_vertical_numbers(sudoku_matrix):
    """Return True if the numbers in each column are unique."""
    ...


def check_all_squares(sudoku_matrix):
    """Return True if the numbers in each 3x3 Sudoku square are unique."""
    ...


def sudoku_matrix_from_file(lines):
    """
    Return a 9x9 list-of-lists of numbers from the given file.

    Raises a SystemExit exception if the file format is invalid.
    """
    ...


def check_sudoku(sudoku_matrix):
    """Return True if the given list-of-lists represents a solved Sudoku."""
    ...

Fully Refactored

Here’s a fully refactored version.

The only additional thing we’ve changed is that our sudoku_matrix_from_file function raises ValueError exceptions because raising a SystemExit exception feels a bit odd from a function that isn’t the main function.

Our main function catches any raised ValueError exceptions and passes them to sys.exit, which does the same thing as raising a SystemExit exception (it prints to standard error while exiting with a positive error code).

"""Script to verify whether a given 9x9 Sudoku text file is valid/solved."""
from argparse import ArgumentParser, FileType
import sys


def transpose(matrix):
    """Transpose the given matrix (columns become rows)."""
    return zip(*matrix)


def has_no_duplicates(numbers):
    """Return True if all the iterable's items are unique."""
    return len(set(numbers)) == len(numbers)


def get_square(sudoku_matrix, a, b, c, d):
    """Return all numbers between rows a & b and columns c & d."""
    return [
        n
        for row in sudoku_matrix[a:b]
        for n in row[c:d]
    ]


def check_all_horizontal_numbers(sudoku_matrix):
    """Return True if the numbers in each row are unique."""
    return all(
        has_no_duplicates(n)
        for n in sudoku_matrix
    )


def check_all_vertical_numbers(sudoku_matrix):
    """Return True if the numbers in each column are unique."""
    return all(
        has_no_duplicates(column)
        for column in transpose(sudoku_matrix)
    )


def check_all_squares(sudoku_matrix):
    """Return True if the numbers in each 3x3 Sudoku square are unique."""
    squares = [
        get_square(sudoku_matrix, i, i+3, j, j+3)
        for i in range(0, 9, 3)
        for j in range(0, 9, 3)
    ]
    return all(
        has_no_duplicates(square)
        for square in squares
    )


def sudoku_matrix_from_file(lines):
    """
    Return a 9x9 list-of-lists of numbers from the given file.

    Raises a ValueError exception if the file format is invalid.
    """
        rows = [
            [int(n) for n in line.split()]
            for line in lines
        ]
    if len(sudoku_matrix) != 9:
        raise ValueError("File must have exactly 9 rows")
    if {len(row) for row in sudoku_matrix} != {9}:
        raise ValueError("Each row must have exactly 9 columns")
    if not ({n for row in sudoku_matrix for n in row} <= set(range(1, 10))):
        raise ValueError("Each column must contain the numbers 1 to 9 only")
    return rows


def check_sudoku(sudoku_matrix):
    """Return True if the given list-of-lists represents a solved Sudoku."""
    return (check_all_horizontal_numbers(sudoku_matrix)
            and check_all_vertical_numbers(sudoku_matrix)
            and check_all_squares(sudoku_matrix))


def main():
    parser = ArgumentParser()
    parser.add_argument("sudoku_file", type=FileType("rt"))
    args = parser.parse_args()
    try:
        matrix = sudoku_matrix_from_file(args.sudoku_file)
    except ValueError as e:
        sys.exit(e)
    if check_sudoku(matrix):
        print("ok")
    else:
        print("not solved")


if __name__ == "__main__":
    main()

What About a Class?

Pretty much all of these functions accept the same first argument: our list-of-list of numbers.

Whenever you have many functions that accept the same arguments (or share many common arguments), you might want to ask “would a class be helpful here instead?”

Here’s a version of this program that uses a custom list-like class (using UserList from Python’s collections module):

from argparse import ArgumentParser, FileType
from collections import UserList
import sys


class SudokuGrid(UserList):
    """Class representing 9x9 sudoku grid."""

    def __init__(self, data):
        """Validate row/col counts and ensure only 1-9 present."""
        super().__init__(data)
        if len(self) != 9:
            raise ValueError("File must have exactly 9 rows")
        if {len(row) for row in self} != {9}:
            raise ValueError("Each row must have exactly 9 columns")
        if not ({n for row in self for n in row} <= set(range(1, 10))):
            raise ValueError("Each row must contain the numbers 1 to 9 only")

    @classmethod
    def from_file(cls, sudoku_file):
        return cls(
            [int(n) for n in line.split()]
            for line in sudoku_file
        )

    def is_solved(self):
        """Return True if sudoku is fully valid."""
        return (
            self.rows_are_valid()
            and self.columns_are_valid()
            and self.squares_are_valid()
        )

    def rows_are_valid(self):
        """Return True if each row is valid."""
        return all(
            has_no_duplicates(row)
            for row in self
        )

    def columns_are_valid(self):
        """Return True if each column is valid."""
        return all(
            has_no_duplicates(column)
            for column in transpose(self)
        )

    def squares_are_valid(self):
        """Return True if each 3x3 sub-square is valid."""
        return all(
            has_no_duplicates(square)
            for square in self._create_all_squares()
        )

    def _create_all_squares(self):
        """Return all nine 3x3 squares."""
        return [
            self._get_square(x, y)
            for x in range(0, 9, 3)
            for y in range(0, 9, 3)
        ]

    def _get_square(self, x, y):
        """Return 3x3 square starting from position (x, y)."""
        return [
            n
            for row in self.data[x:x+3]
            for n in row[y:y+3]
        ]


def transpose(matrix):
    """Transpose the given matrix (columns become rows)."""
    return zip(*matrix)


def has_no_duplicates(numbers):
    """Return True if given iterable has no duplicates."""
    return len(set(numbers)) == len(numbers)


def main():
    parser = ArgumentParser()
    parser.add_argument("sudoku_file", type=FileType("rt"))
    args = parser.parse_args()
    try:
        sudoku = SudokuGrid.from_file(args.sudoku_file)
    except ValueError as e:
        sys.exit(e)
    print("ok" if sudoku.is_solved() else "not solved")


if __name__ == "__main__":
    main()

This isn’t necessarily better than our function-based approach, but it isn’t an interesting solution. The method names are a bit nicer than the function names we used before and our _-prefixed helper methods should maybe be _-prefixed helper functions in our other version.

Which do you prefer?

Tests

You can run the tests for this exercise with:

$ python test_check_sudoku.py

The test file includes several test cases to verify the sudoku checker works correctly:

  • test_solved_sudoku: Tests that a correctly solved sudoku returns “ok”

  • test_unsolved_sudoku: Tests that an invalid sudoku returns “not solved”

  • test_invalid_file: Tests that invalid file formats raise SystemExit

  • test_missing_file: Tests that missing files are handled properly

The tests use a sophisticated test runner that creates temporary sudoku files and captures program output to verify behavior. This allows testing the command-line interface without manually creating files.