add/remove blank lines for PEP + add BB_GRADEBOOKS_DIR, BB_SUBMISSIONS_DIR in settings.py to allow easy changing of dir names

This commit is contained in:
2024-02-26 20:48:45 +00:00
parent bf7aaa12f2
commit 08ffefa798
7 changed files with 28 additions and 30 deletions

View File

@@ -1,20 +1,20 @@
import os, sys import os, sys
from utils.inspector import generate_hashes_gradebook, generate_duplicate_hashes_gradebook from utils.inspector import generate_hashes_gradebook, generate_duplicate_hashes_gradebook
from utils.settings import BB_GRADEBOOKS_DIR
def main(): def main():
gradebook_dir_name = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else exit(f'\nNo gradebook directory name given. Provide the name as an argument.\n\nUsage: python {sys.argv[0]} [gradebook dir name]\nExample: python {sys.argv[0]} AssignmentX\n') gradebook_dir_name = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else exit(f'\nNo gradebook directory name given. Provide the name as an argument.\n\nUsage: python {sys.argv[0]} [gradebook dir name]\nExample: python {sys.argv[0]} AssignmentX\n')
gradebook_dir_path = os.path.join('BB_gradebooks', gradebook_dir_name) gradebook_dir_path = os.path.join(BB_GRADEBOOKS_DIR, gradebook_dir_name)
if not os.path.exists(gradebook_dir_path): if not os.path.exists(gradebook_dir_path):
exit('[Info] Gradebook directory does not exist - nothing to inspect') exit('[Info] Gradebook directory does not exist - nothing to inspect')
if not os.listdir(gradebook_dir_path): # if no files in gradebook dir if not os.listdir(gradebook_dir_path): # if no files in gradebook dir
exit(f'[Info] No files found in this gradebook - nothing to inspect') exit(f'[Info] No files found in this gradebook - nothing to inspect')
# generate CSV file with hashes for all files in gradebook & return path to CSV file for finding duplicate hashes hashes_csv_file_path = generate_hashes_gradebook(gradebook_dir_path) # generate CSV file with hashes for all files in gradebook & return path to CSV file for finding duplicate hashes
hashes_csv_file_path = generate_hashes_gradebook(gradebook_dir_path) generate_duplicate_hashes_gradebook(hashes_csv_file_path) # generate CSV file with files having duplicate hashes
# generate CSV file with files having duplicate hashes
generate_duplicate_hashes_gradebook(hashes_csv_file_path)
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@@ -1,19 +1,19 @@
import os, sys import os, sys
from utils.inspector import generate_hashes_submissions, generate_duplicate_hashes_submissions from utils.inspector import generate_hashes_submissions, generate_duplicate_hashes_submissions
from utils.settings import BB_SUBMISSIONS_DIR
def main(): def main():
submissions_dir_name = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else exit(f'\nNo submissions directory name given. Provide the name as an argument.\n\nUsage: python {sys.argv[0]} [submissions dir name]\nExample: python {sys.argv[0]} AssignmentX\n') submissions_dir_name = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else exit(f'\nNo submissions directory name given. Provide the name as an argument.\n\nUsage: python {sys.argv[0]} [submissions dir name]\nExample: python {sys.argv[0]} AssignmentX\n')
submissions_dir_path = os.path.join('BB_submissions', submissions_dir_name) submissions_dir_path = os.path.join(BB_SUBMISSIONS_DIR, submissions_dir_name)
if not os.path.exists(submissions_dir_path): if not os.path.exists(submissions_dir_path):
exit('[Info] Directory does not exist - nothing to inspect') exit('[Info] Directory does not exist - nothing to inspect')
if not os.listdir(submissions_dir_path): # if no files in dir if not os.listdir(submissions_dir_path): # if no files in dir
exit(f'[Info] No files found in this submissions directory - nothing to inspect') exit(f'[Info] No files found in this submissions directory - nothing to inspect')
# generate CSV file with hashes for all files in submissions (except for any 'excluded') & return path to CSV file for finding duplicate hashes hashes_csv_file_path = generate_hashes_submissions(submissions_dir_path) # generate CSV file with hashes for all files in submissions (except for any 'excluded') & return path to CSV file for finding duplicate hashes
hashes_csv_file_path = generate_hashes_submissions(submissions_dir_path) generate_duplicate_hashes_submissions(hashes_csv_file_path) # generate CSV file with files having duplicate hashes
# generate CSV file with files having duplicate hashes
generate_duplicate_hashes_submissions(hashes_csv_file_path)
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -1,11 +1,13 @@
import os, sys import os, sys
from utils.organiser import organise_gradebook, check_submissions_dir_for_compressed from utils.organiser import organise_gradebook, check_submissions_dir_for_compressed
from utils.settings import BB_GRADEBOOKS_DIR, BB_SUBMISSIONS_DIR
def main(): def main():
gradebook_name = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else exit(f'\nNo gradebook name given. Provide the name as an argument.\n\nUsage: python {sys.argv[0]} [gradebook dir name]\n') gradebook_name = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else exit(f'\nNo gradebook name given. Provide the name as an argument.\n\nUsage: python {sys.argv[0]} [gradebook dir name]\n')
gradebook_dir = os.path.join('BB_gradebooks', gradebook_name) # gradebook from Blackboard with all submissions gradebook_dir = os.path.join(BB_GRADEBOOKS_DIR, gradebook_name) # gradebook from Blackboard with all submissions
submissions_dir = os.path.join('BB_submissions', gradebook_name) # target dir for extracted submissions submissions_dir = os.path.join(BB_SUBMISSIONS_DIR, gradebook_name) # target dir for extracted submissions
abs_path = os.getcwd() # absolute path of main/this script abs_path = os.getcwd() # absolute path of main/this script
print(f'\nGradebook directory to organise:\n{os.path.join(abs_path, gradebook_dir)}', flush=True) print(f'\nGradebook directory to organise:\n{os.path.join(abs_path, gradebook_dir)}', flush=True)
@@ -16,4 +18,3 @@ def main():
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@@ -4,6 +4,7 @@ from py7zr import SevenZipFile, exceptions
from utils.settings import BAD_DIR_NAME from utils.settings import BAD_DIR_NAME
def mark_file_as_BAD(file: str, bad_exception: Exception) -> None: def mark_file_as_BAD(file: str, bad_exception: Exception) -> None:
try: try:
filename = os.path.basename(file) filename = os.path.basename(file)
@@ -73,4 +74,4 @@ def extract_file_to_dir(file_path: str, student_dir: str) -> None | Exception:
elif file_path.lower().endswith('.7z'): elif file_path.lower().endswith('.7z'):
extract_7z(file_path, student_dir) extract_7z(file_path, student_dir)
else: else:
print(f"\n[ERROR] unknown file type: {file_path}\n", flush=True) print(f'\n[ERROR] unknown file type: {file_path}\n', flush=True)

View File

@@ -5,7 +5,7 @@ import hashlib
import pandas as pd import pandas as pd
from functools import partial from functools import partial
from utils.settings import CSV_DIR from utils.settings import CSV_DIR, BB_GRADEBOOKS_DIR, BB_SUBMISSIONS_DIR
def load_excluded_filenames(submissions_dir_name: str) -> list[str]: # helper function for hashing all files def load_excluded_filenames(submissions_dir_name: str) -> list[str]: # helper function for hashing all files
@@ -25,7 +25,6 @@ def load_excluded_filenames(submissions_dir_name: str) -> list[str]: # helper f
print(f'[INFO] Error message: {e}', flush=True) print(f'[INFO] Error message: {e}', flush=True)
return [] return []
def get_hashes_in_dir(dir_path: str, excluded_filenames: list = []) -> list: # helper function for hashing all files def get_hashes_in_dir(dir_path: str, excluded_filenames: list = []) -> list: # helper function for hashing all files
hash_list = [] hash_list = []
for subdir, dirs, files in os.walk(dir_path): # loop through all files in the directory and generate hashes for subdir, dirs, files in os.walk(dir_path): # loop through all files in the directory and generate hashes
@@ -38,11 +37,10 @@ def get_hashes_in_dir(dir_path: str, excluded_filenames: list = []) -> list: #
hash_list.append({ 'filepath': filepath, 'filename': filename, 'sha256 hash': filehash}) hash_list.append({ 'filepath': filepath, 'filename': filename, 'sha256 hash': filehash})
return hash_list return hash_list
def generate_hashes_gradebook(gradebook_dir_path: str) -> str: # main function for hashing all files in gradebook def generate_hashes_gradebook(gradebook_dir_path: str) -> str: # main function for hashing all files in gradebook
gradebook_dir_name = os.path.abspath(gradebook_dir_path).split(os.path.sep)[-1] # get name of gradebook by separating path and use rightmost part gradebook_dir_name = os.path.abspath(gradebook_dir_path).split(os.path.sep)[-1] # get name of gradebook by separating path and use rightmost part
if not os.path.isdir(gradebook_dir_path): if not os.path.isdir(gradebook_dir_path):
exit(f'Directory {gradebook_dir_path} does not exist.\nMake sure "{gradebook_dir_name}" exists in "BB_gradebooks".\n') exit(f'Directory {gradebook_dir_path} does not exist.\nMake sure "{gradebook_dir_name}" exists in "{BB_GRADEBOOKS_DIR}".\n')
dicts_with_hashes_list = get_hashes_in_dir(gradebook_dir_path) dicts_with_hashes_list = get_hashes_in_dir(gradebook_dir_path)
for hash_dict in dicts_with_hashes_list: for hash_dict in dicts_with_hashes_list:
@@ -64,11 +62,10 @@ def generate_hashes_gradebook(gradebook_dir_path: str) -> str: # main function
print(f'[INFO] Created CSV file with all files & hashes in gradebook: {gradebook_dir_name}\nCSV file: {csv_file_path}', flush=True) print(f'[INFO] Created CSV file with all files & hashes in gradebook: {gradebook_dir_name}\nCSV file: {csv_file_path}', flush=True)
return csv_file_path return csv_file_path
def generate_hashes_submissions(submissions_dir_path: str) -> str: # main function for hashing all files in submissions def generate_hashes_submissions(submissions_dir_path: str) -> str: # main function for hashing all files in submissions
submissions_dir_name = os.path.abspath(submissions_dir_path).split(os.path.sep)[-1] # get name of submission/assignment by separating path and use rightmost part submissions_dir_name = os.path.abspath(submissions_dir_path).split(os.path.sep)[-1] # get name of submission/assignment by separating path and use rightmost part
if not os.path.isdir(submissions_dir_path): if not os.path.isdir(submissions_dir_path):
exit(f'Directory {submissions_dir_path} does not exist.\nMake sure "{submissions_dir_name}" exists in "BB_submissions".\n') exit(f'Directory {submissions_dir_path} does not exist.\nMake sure "{submissions_dir_name}" exists in "{BB_SUBMISSIONS_DIR}".\n')
excluded_filenames = load_excluded_filenames(submissions_dir_name) excluded_filenames = load_excluded_filenames(submissions_dir_name)
dicts_with_hashes_list = [] dicts_with_hashes_list = []
@@ -96,8 +93,7 @@ def generate_hashes_submissions(submissions_dir_path: str) -> str: # main funct
for student_dict in dicts_with_hashes_list: for student_dict in dicts_with_hashes_list:
writer.writerows(student_dict) writer.writerows(student_dict)
print(f'[INFO] Created CSV file with all files & hashes for submissions in: {submissions_dir_name}\nCSV file: {csv_file_path}', flush=True) print(f'[INFO] Created CSV file with all files & hashes for submissions in: {submissions_dir_name}\nCSV file: {csv_file_path}', flush=True)
return csv_file_path return csv_file_path
def generate_duplicate_hashes_generic(hashes_csv_file_path: str, drop_columns: list[str]): def generate_duplicate_hashes_generic(hashes_csv_file_path: str, drop_columns: list[str]):
csv = pd.read_csv(hashes_csv_file_path) csv = pd.read_csv(hashes_csv_file_path)

View File

@@ -1,10 +1,12 @@
import os, shutil, re import os, shutil, re
from utils.extractor import extract_file_to_dir from utils.extractor import extract_file_to_dir
from utils.settings import BAD_DIR_NAME from utils.settings import BAD_DIR_NAME, BB_GRADEBOOKS_DIR
def validate_gradebook_dir_name(src_dir: str) -> None: def validate_gradebook_dir_name(src_dir: str) -> None:
if not os.path.isdir(src_dir): # check if it exists and is a directory if not os.path.isdir(src_dir): # check if it exists and is a directory
print(f"\n[Error] Incorrect directory: {src_dir}\n[Info] Make sure the directory exists in 'BB_gradebooks'") print(f'\n[Error] Incorrect directory: {src_dir}\n[Info] Make sure the directory exists in "{BB_GRADEBOOKS_DIR}"')
exit() exit()
if not os.listdir(src_dir): # check if there are any files in the directory if not os.listdir(src_dir): # check if there are any files in the directory
print(f'\n[Info] No files found in this gradebook - nothing to organise') print(f'\n[Info] No files found in this gradebook - nothing to organise')
@@ -13,7 +15,6 @@ def validate_gradebook_dir_name(src_dir: str) -> None:
print(f'\n[Info] Gradebook has only invalid compressed files in: {os.path.join(src_dir, BAD_DIR_NAME)}\n[Info] Nothing to organise') print(f'\n[Info] Gradebook has only invalid compressed files in: {os.path.join(src_dir, BAD_DIR_NAME)}\n[Info] Nothing to organise')
exit() exit()
def get_comment_from_submission_txt(file_path: str) -> str | None: def get_comment_from_submission_txt(file_path: str) -> str | None:
no_comment_text = f'Comments:\nThere are no student comments for this assignment.' no_comment_text = f'Comments:\nThere are no student comments for this assignment.'
no_comment_text_regex = no_comment_text no_comment_text_regex = no_comment_text
@@ -30,7 +31,6 @@ def get_comment_from_submission_txt(file_path: str) -> str | None:
return comment return comment
return None return None
def get_gradebook_stats(src_dir: str) -> dict[str, int]: def get_gradebook_stats(src_dir: str) -> dict[str, int]:
all_files = [ os.path.join(src_dir, f) for f in os.listdir(src_dir) if BAD_DIR_NAME not in f ] all_files = [ os.path.join(src_dir, f) for f in os.listdir(src_dir) if BAD_DIR_NAME not in f ]
dirs = [ f for f in all_files if os.path.isdir(f) and BAD_DIR_NAME not in f ] dirs = [ f for f in all_files if os.path.isdir(f) and BAD_DIR_NAME not in f ]
@@ -55,7 +55,6 @@ def get_gradebook_stats(src_dir: str) -> dict[str, int]:
print(msg, flush=True) print(msg, flush=True)
return files_counter return files_counter
def organise_file_per_student(src_dir: str, dest_dir: str, file_name: str, student_no: str) -> None: def organise_file_per_student(src_dir: str, dest_dir: str, file_name: str, student_no: str) -> None:
student_dir = os.path.join(dest_dir, student_no) student_dir = os.path.join(dest_dir, student_no)
os.makedirs(student_dir, exist_ok=True) # create student directory if it doesn't exist os.makedirs(student_dir, exist_ok=True) # create student directory if it doesn't exist
@@ -79,7 +78,6 @@ def organise_file_per_student(src_dir: str, dest_dir: str, file_name: str, stude
new_file_path = os.path.join(student_dir, os.path.basename(file_name)) new_file_path = os.path.join(student_dir, os.path.basename(file_name))
shutil.move(file_path, new_file_path) # move the file to student directory shutil.move(file_path, new_file_path) # move the file to student directory
def organise_gradebook(src_dir: str, dest_dir: str) -> None: def organise_gradebook(src_dir: str, dest_dir: str) -> None:
"""1) extracts .zip, .rar, .7z files, organises contents into directories per student number, and deletes compressed files after successful extraction """1) extracts .zip, .rar, .7z files, organises contents into directories per student number, and deletes compressed files after successful extraction
2) organises all other files in gradebook into directories per student number 2) organises all other files in gradebook into directories per student number
@@ -107,7 +105,6 @@ def organise_gradebook(src_dir: str, dest_dir: str) -> None:
print(f'[Info] Comments in file: {dest_dir}_comments.txt', flush=True) print(f'[Info] Comments in file: {dest_dir}_comments.txt', flush=True)
print(f'[Note] Compressed files (.zip, .rar, .7z) are automatically deleted from the gradebook directory after successful extraction', flush=True) print(f'[Note] Compressed files (.zip, .rar, .7z) are automatically deleted from the gradebook directory after successful extraction', flush=True)
def check_submissions_dir_for_compressed(submissions_dir: str) -> None: def check_submissions_dir_for_compressed(submissions_dir: str) -> None:
"""checks if any submitted compressed files contain more compressed files inside (they are not recursively extracted) """checks if any submitted compressed files contain more compressed files inside (they are not recursively extracted)

View File

@@ -1,4 +1,7 @@
import os import os
BB_GRADEBOOKS_DIR = 'BB_gradebooks' # directory with extracted gradebooks downloaded from Blackboard
BB_SUBMISSIONS_DIR = 'BB_submissions' # directory with organised gradebook submissions
BAD_DIR_NAME = '__BAD__' # for organise_gradebook.py - directory with corrupt/invalid compressed files BAD_DIR_NAME = '__BAD__' # for organise_gradebook.py - directory with corrupt/invalid compressed files
CSV_DIR = os.path.join(os.getcwd(), 'csv-inspect') # for inspect_gradebook.py and inspect_submissions.py - output dir for generated CSV files CSV_DIR = os.path.join(os.getcwd(), 'csv-inspect') # for inspect_gradebook.py and inspect_submissions.py - output dir for generated CSV files