add/remove blank lines for PEP + add BB_GRADEBOOKS_DIR, BB_SUBMISSIONS_DIR in settings.py to allow easy changing of dir names

2024-02-26 20:48:45 +00:00
parent bf7aaa12f2
commit 08ffefa798
7 changed files with 28 additions and 30 deletions
--- a/inspect_gradebook.py
+++ b/inspect_gradebook.py
@@ -1,20 +1,20 @@
 import os, sys
 from utils.inspector import generate_hashes_gradebook, generate_duplicate_hashes_gradebook
 from utils.settings import BB_GRADEBOOKS_DIR
 def main():
    gradebook_dir_name = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else exit(f'\nNo gradebook directory name given. Provide the name as an argument.\n\nUsage: python {sys.argv[0]} [gradebook dir name]\nExample: python {sys.argv[0]} AssignmentX\n')
-    gradebook_dir_path = os.path.join('BB_gradebooks', gradebook_dir_name)
+    gradebook_dir_path = os.path.join(BB_GRADEBOOKS_DIR, gradebook_dir_name)
    if not os.path.exists(gradebook_dir_path):
        exit('[Info] Gradebook directory does not exist - nothing to inspect')
    if not os.listdir(gradebook_dir_path):  # if no files in gradebook dir
        exit(f'[Info] No files found in this gradebook - nothing to inspect')
-    # generate CSV file with hashes for all files in gradebook & return path to CSV file for finding duplicate hashes
+    hashes_csv_file_path = generate_hashes_gradebook(gradebook_dir_path)  # generate CSV file with hashes for all files in gradebook & return path to CSV file for finding duplicate hashes
-    hashes_csv_file_path = generate_hashes_gradebook(gradebook_dir_path)
+    generate_duplicate_hashes_gradebook(hashes_csv_file_path)  # generate CSV file with files having duplicate hashes
    # generate CSV file with files having duplicate hashes
    generate_duplicate_hashes_gradebook(hashes_csv_file_path)
 if __name__ == '__main__':
-    main()
+    main()
--- a/inspect_submissions.py
+++ b/inspect_submissions.py
@@ -1,19 +1,19 @@
 import os, sys
 from utils.inspector import generate_hashes_submissions, generate_duplicate_hashes_submissions
 from utils.settings import BB_SUBMISSIONS_DIR
 def main():
    submissions_dir_name = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else exit(f'\nNo submissions directory name given. Provide the name as an argument.\n\nUsage: python {sys.argv[0]} [submissions dir name]\nExample: python {sys.argv[0]} AssignmentX\n')
-    submissions_dir_path = os.path.join('BB_submissions', submissions_dir_name)
+    submissions_dir_path = os.path.join(BB_SUBMISSIONS_DIR, submissions_dir_name)
    if not os.path.exists(submissions_dir_path):
        exit('[Info] Directory does not exist - nothing to inspect')
    if not os.listdir(submissions_dir_path):  # if no files in dir
        exit(f'[Info] No files found in this submissions directory - nothing to inspect')    
-    # generate CSV file with hashes for all files in submissions (except for any 'excluded') & return path to CSV file for finding duplicate hashes
+    hashes_csv_file_path = generate_hashes_submissions(submissions_dir_path)  # generate CSV file with hashes for all files in submissions (except for any 'excluded') & return path to CSV file for finding duplicate hashes
-    hashes_csv_file_path = generate_hashes_submissions(submissions_dir_path)
+    generate_duplicate_hashes_submissions(hashes_csv_file_path)  # generate CSV file with files having duplicate hashes
    # generate CSV file with files having duplicate hashes
    generate_duplicate_hashes_submissions(hashes_csv_file_path)
 if __name__ == '__main__':    
--- a/organise_gradebook.py
+++ b/organise_gradebook.py
@@ -1,11 +1,13 @@
 import os, sys
 from utils.organiser import organise_gradebook, check_submissions_dir_for_compressed
 from utils.settings import BB_GRADEBOOKS_DIR, BB_SUBMISSIONS_DIR
 def main():
    gradebook_name = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else exit(f'\nNo gradebook name given. Provide the name as an argument.\n\nUsage: python {sys.argv[0]} [gradebook dir name]\n')
-    gradebook_dir = os.path.join('BB_gradebooks', gradebook_name)  # gradebook from Blackboard with all submissions
+    gradebook_dir = os.path.join(BB_GRADEBOOKS_DIR, gradebook_name)  # gradebook from Blackboard with all submissions
-    submissions_dir = os.path.join('BB_submissions', gradebook_name)  # target dir for extracted submissions
+    submissions_dir = os.path.join(BB_SUBMISSIONS_DIR, gradebook_name)  # target dir for extracted submissions
    abs_path = os.getcwd()  # absolute path of main/this script
    print(f'\nGradebook directory to organise:\n{os.path.join(abs_path, gradebook_dir)}', flush=True)
@@ -16,4 +18,3 @@ def main():
 if __name__ == '__main__':    
    main()
--- a/utils/extractor.py
+++ b/utils/extractor.py
@@ -4,6 +4,7 @@ from py7zr import SevenZipFile, exceptions
 from utils.settings import BAD_DIR_NAME
 def mark_file_as_BAD(file: str, bad_exception: Exception) -> None:
    try:
        filename = os.path.basename(file)
@@ -73,4 +74,4 @@ def extract_file_to_dir(file_path: str, student_dir: str) -> None | Exception:
    elif file_path.lower().endswith('.7z'):
        extract_7z(file_path, student_dir) 
    else:
-        print(f"\n[ERROR] unknown file type: {file_path}\n", flush=True)
+        print(f'\n[ERROR] unknown file type: {file_path}\n', flush=True)
--- a/utils/inspector.py
+++ b/utils/inspector.py
@@ -5,7 +5,7 @@ import hashlib
 import pandas as pd
 from functools import partial
-from utils.settings import CSV_DIR
+from utils.settings import CSV_DIR, BB_GRADEBOOKS_DIR, BB_SUBMISSIONS_DIR
 def load_excluded_filenames(submissions_dir_name: str) -> list[str]:  # helper function for hashing all files
@@ -25,7 +25,6 @@ def load_excluded_filenames(submissions_dir_name: str) -> list[str]:  # helper f
            print(f'[INFO] Error message: {e}', flush=True)
            return []
 def get_hashes_in_dir(dir_path: str, excluded_filenames: list = []) -> list:  # helper function for hashing all files
    hash_list = []
    for subdir, dirs, files in os.walk(dir_path):  # loop through all files in the directory and generate hashes
@@ -38,11 +37,10 @@ def get_hashes_in_dir(dir_path: str, excluded_filenames: list = []) -> list:  #
                        hash_list.append({ 'filepath': filepath, 'filename': filename, 'sha256 hash': filehash})
    return hash_list
 def generate_hashes_gradebook(gradebook_dir_path: str) -> str:  # main function for hashing all files in gradebook
    gradebook_dir_name = os.path.abspath(gradebook_dir_path).split(os.path.sep)[-1]  # get name of gradebook by separating path and use rightmost part
    if not os.path.isdir(gradebook_dir_path):
-        exit(f'Directory {gradebook_dir_path} does not exist.\nMake sure "{gradebook_dir_name}" exists in "BB_gradebooks".\n')
+        exit(f'Directory {gradebook_dir_path} does not exist.\nMake sure "{gradebook_dir_name}" exists in "{BB_GRADEBOOKS_DIR}".\n')
    dicts_with_hashes_list = get_hashes_in_dir(gradebook_dir_path)
    for hash_dict in dicts_with_hashes_list:
@@ -64,11 +62,10 @@ def generate_hashes_gradebook(gradebook_dir_path: str) -> str:  # main function
    print(f'[INFO] Created CSV file with all files & hashes in gradebook: {gradebook_dir_name}\nCSV file: {csv_file_path}', flush=True)
    return csv_file_path
 def generate_hashes_submissions(submissions_dir_path: str) -> str:  # main function for hashing all files in submissions
    submissions_dir_name = os.path.abspath(submissions_dir_path).split(os.path.sep)[-1]  # get name of submission/assignment by separating path and use rightmost part
    if not os.path.isdir(submissions_dir_path):
-        exit(f'Directory {submissions_dir_path} does not exist.\nMake sure "{submissions_dir_name}" exists in "BB_submissions".\n')
+        exit(f'Directory {submissions_dir_path} does not exist.\nMake sure "{submissions_dir_name}" exists in "{BB_SUBMISSIONS_DIR}".\n')
    excluded_filenames = load_excluded_filenames(submissions_dir_name)
    dicts_with_hashes_list = []
@@ -96,8 +93,7 @@ def generate_hashes_submissions(submissions_dir_path: str) -> str:  # main funct
        for student_dict in dicts_with_hashes_list:
            writer.writerows(student_dict)
    print(f'[INFO] Created CSV file with all files & hashes for submissions in: {submissions_dir_name}\nCSV file: {csv_file_path}', flush=True)
-    return csv_file_path
+    return csv_file_path 
 def generate_duplicate_hashes_generic(hashes_csv_file_path: str, drop_columns: list[str]):
    csv = pd.read_csv(hashes_csv_file_path)
--- a/utils/organiser.py
+++ b/utils/organiser.py
@@ -1,10 +1,12 @@
 import os, shutil, re
 from utils.extractor import extract_file_to_dir
-from utils.settings import BAD_DIR_NAME
+from utils.settings import BAD_DIR_NAME, BB_GRADEBOOKS_DIR
 def validate_gradebook_dir_name(src_dir: str) -> None:
    if not os.path.isdir(src_dir):  # check if it exists and is a directory
-        print(f"\n[Error] Incorrect directory: {src_dir}\n[Info] Make sure the directory exists in 'BB_gradebooks'")
+        print(f'\n[Error] Incorrect directory: {src_dir}\n[Info] Make sure the directory exists in "{BB_GRADEBOOKS_DIR}"')
        exit()
    if not os.listdir(src_dir):  # check if there are any files in the directory
        print(f'\n[Info] No files found in this gradebook - nothing to organise')
@@ -13,7 +15,6 @@ def validate_gradebook_dir_name(src_dir: str) -> None:
        print(f'\n[Info] Gradebook has only invalid compressed files in: {os.path.join(src_dir, BAD_DIR_NAME)}\n[Info] Nothing to organise')
        exit()
 def get_comment_from_submission_txt(file_path: str) -> str | None:
    no_comment_text = f'Comments:\nThere are no student comments for this assignment.'
    no_comment_text_regex = no_comment_text
@@ -30,7 +31,6 @@ def get_comment_from_submission_txt(file_path: str) -> str | None:
                return comment
    return None
 def get_gradebook_stats(src_dir: str) -> dict[str, int]:
    all_files = [ os.path.join(src_dir, f) for f in os.listdir(src_dir) if BAD_DIR_NAME not in f ]
    dirs = [ f for f in all_files if os.path.isdir(f) and BAD_DIR_NAME not in f ]
@@ -55,7 +55,6 @@ def get_gradebook_stats(src_dir: str) -> dict[str, int]:
    print(msg, flush=True)
    return files_counter
 def organise_file_per_student(src_dir: str, dest_dir: str, file_name: str, student_no: str) -> None:
    student_dir = os.path.join(dest_dir, student_no)
    os.makedirs(student_dir, exist_ok=True)  # create student directory if it doesn't exist
@@ -79,7 +78,6 @@ def organise_file_per_student(src_dir: str, dest_dir: str, file_name: str, stude
            new_file_path = os.path.join(student_dir, os.path.basename(file_name))
            shutil.move(file_path, new_file_path)  # move the file to student directory
 def organise_gradebook(src_dir: str, dest_dir: str) -> None:
    """1) extracts .zip, .rar, .7z files, organises contents into directories per student number, and deletes compressed files after successful extraction
    2) organises all other files in gradebook into directories per student number
@@ -107,7 +105,6 @@ def organise_gradebook(src_dir: str, dest_dir: str) -> None:
        print(f'[Info] Comments in file: {dest_dir}_comments.txt', flush=True)
    print(f'[Note] Compressed files (.zip, .rar, .7z) are automatically deleted from the gradebook directory after successful extraction', flush=True)
 def check_submissions_dir_for_compressed(submissions_dir: str) -> None:
    """checks if any submitted compressed files contain more compressed files inside (they are not recursively extracted)
--- a/utils/settings.py
+++ b/utils/settings.py
@@ -1,4 +1,7 @@
 import os
 BB_GRADEBOOKS_DIR = 'BB_gradebooks'  # directory with extracted gradebooks downloaded from Blackboard
 BB_SUBMISSIONS_DIR = 'BB_submissions'  # directory with organised gradebook submissions
 BAD_DIR_NAME = '__BAD__'  # for organise_gradebook.py - directory with corrupt/invalid compressed files
 CSV_DIR = os.path.join(os.getcwd(), 'csv-inspect')  # for inspect_gradebook.py and inspect_submissions.py - output dir for generated CSV files