Compare commits
2 Commits
8a4dee8e73
...
0385e13da7
| Author | SHA1 | Date | |
|---|---|---|---|
| 0385e13da7 | |||
| 7577148f83 |
@@ -2,7 +2,7 @@ import os, shutil, platform
|
|||||||
import zipfile, rarfile
|
import zipfile, rarfile
|
||||||
from py7zr import SevenZipFile, exceptions
|
from py7zr import SevenZipFile, exceptions
|
||||||
|
|
||||||
from utils.settings import BAD_DIR_NAME
|
from utils.settings import BAD_DIR_NAME, IGNORE_DIRS
|
||||||
|
|
||||||
|
|
||||||
def mark_file_as_BAD(file: str, bad_exception: Exception) -> None:
|
def mark_file_as_BAD(file: str, bad_exception: Exception) -> None:
|
||||||
@@ -19,8 +19,8 @@ def mark_file_as_BAD(file: str, bad_exception: Exception) -> None:
|
|||||||
def extract_zip(zip_file: str, target_dir: str) -> None | Exception:
|
def extract_zip(zip_file: str, target_dir: str) -> None | Exception:
|
||||||
try:
|
try:
|
||||||
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
|
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
|
||||||
members = [ m for m in zip_ref.infolist() if "__MACOSX" not in m.filename ]
|
members = [ m for m in zip_ref.infolist() if not any(dir_name in m.filename for dir_name in IGNORE_DIRS) ] # filter out files/dirs using IGNORE_DIRS
|
||||||
zip_ref.extractall(target_dir, members=members) # extract all files, ignoring those with the "__MACOSX" string in the name
|
zip_ref.extractall(target_dir, members=members) # extract remaining files
|
||||||
zip_ref.close()
|
zip_ref.close()
|
||||||
except zipfile.BadZipfile as e:
|
except zipfile.BadZipfile as e:
|
||||||
mark_file_as_BAD(zip_file, e)
|
mark_file_as_BAD(zip_file, e)
|
||||||
@@ -36,7 +36,7 @@ def extract_rar(rar_file: str, target_dir: str) -> None:
|
|||||||
else: # if Linux or Mac
|
else: # if Linux or Mac
|
||||||
rarfile.UNRAR_TOOL = 'unrar'
|
rarfile.UNRAR_TOOL = 'unrar'
|
||||||
files = rar_ref.namelist()
|
files = rar_ref.namelist()
|
||||||
files = [ f for f in files if "__MACOSX" not in f ] # filter out files with "__MACOSX" in the name
|
files = [ f for f in files if not any(dir_name in f for dir_name in IGNORE_DIRS) ] # filter out files/dirs using IGNORE_DIRS
|
||||||
rar_ref.extractall(target_dir, files) # extract the remaining files
|
rar_ref.extractall(target_dir, files) # extract the remaining files
|
||||||
rar_ref.close()
|
rar_ref.close()
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
@@ -56,7 +56,7 @@ def extract_7z(seven_zip_file: str, target_dir: str) -> None:
|
|||||||
if not seven_zip.getnames():
|
if not seven_zip.getnames():
|
||||||
raise exceptions.Bad7zFile
|
raise exceptions.Bad7zFile
|
||||||
files = seven_zip.getnames()
|
files = seven_zip.getnames()
|
||||||
files = [ f for f in files if "__MACOSX" not in f ] # filter out files with "__MACOSX" in the name
|
files = [ f for f in files if not any(dir_name in f for dir_name in IGNORE_DIRS) ] # filter out files/dirs using IGNORE_DIRS
|
||||||
seven_zip.extract(target_dir, targets=files) # extract the remaining files
|
seven_zip.extract(target_dir, targets=files) # extract the remaining files
|
||||||
seven_zip.close()
|
seven_zip.close()
|
||||||
except exceptions.Bad7zFile as e:
|
except exceptions.Bad7zFile as e:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import os, shutil, re
|
import os, shutil, re
|
||||||
|
|
||||||
from utils.extractor import extract_file_to_dir
|
from utils.extractor import extract_file_to_dir
|
||||||
from utils.settings import BAD_DIR_NAME, BB_GRADEBOOKS_DIR
|
from utils.settings import BAD_DIR_NAME, BB_GRADEBOOKS_DIR, IGNORE_DIRS
|
||||||
|
|
||||||
|
|
||||||
def validate_gradebook_dir_name(src_dir: str) -> None:
|
def validate_gradebook_dir_name(src_dir: str) -> None:
|
||||||
@@ -15,21 +15,24 @@ def validate_gradebook_dir_name(src_dir: str) -> None:
|
|||||||
print(f'\n[Info] Gradebook has only invalid compressed files in: {os.path.join(src_dir, BAD_DIR_NAME)}\n[Info] Nothing to organise')
|
print(f'\n[Info] Gradebook has only invalid compressed files in: {os.path.join(src_dir, BAD_DIR_NAME)}\n[Info] Nothing to organise')
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
def get_comment_from_submission_txt(file_path: str) -> str | None:
|
def get_comment_from_submission_txt(file_path: str) -> tuple[str, str] | None:
|
||||||
no_comment_text = f'Comments:\nThere are no student comments for this assignment.'
|
no_comment_regex = f'Comments:\nThere are no student comments for this assignment.'
|
||||||
no_comment_text_regex = no_comment_text
|
no_comment_pattern = re.compile(no_comment_regex)
|
||||||
no_comment_regex_compile = re.compile(no_comment_text_regex)
|
|
||||||
|
|
||||||
with open(file_path) as f:
|
with open(file_path) as f:
|
||||||
file_contents = f.read()
|
file_contents = f.read()
|
||||||
if not no_comment_regex_compile.findall(file_contents):
|
if not no_comment_pattern.findall(file_contents):
|
||||||
regular_expression = f'Comments:\n.*'
|
comment_regex = f'Comments:\n.*'
|
||||||
regex_compile = re.compile(regular_expression)
|
name_regex = f'^Name:\s*.*'
|
||||||
if regex_compile.findall(file_contents):
|
comment_pattern = re.compile(comment_regex)
|
||||||
match = regex_compile.findall(file_contents)[0]
|
name_pattern = re.compile(name_regex)
|
||||||
comment = match.split('\n')[1]
|
if comment_pattern.findall(file_contents):
|
||||||
return comment
|
comment_match = comment_pattern.findall(file_contents)[0]
|
||||||
return None
|
comment = comment_match.split('\n')[1]
|
||||||
|
name_match = name_pattern.findall(file_contents)[0]
|
||||||
|
name = name_match.split('Name:')[1].split('(')[0].strip() or ''
|
||||||
|
return comment, name
|
||||||
|
return None, None
|
||||||
|
|
||||||
def get_gradebook_stats(src_dir: str) -> dict[str, int]:
|
def get_gradebook_stats(src_dir: str) -> dict[str, int]:
|
||||||
all_files = [ os.path.join(src_dir, f) for f in os.listdir(src_dir) if BAD_DIR_NAME not in f ]
|
all_files = [ os.path.join(src_dir, f) for f in os.listdir(src_dir) if BAD_DIR_NAME not in f ]
|
||||||
@@ -68,11 +71,11 @@ def organise_file_per_student(src_dir: str, dest_dir: str, file_name: str, stude
|
|||||||
os.remove(file_path) # delete compressed file after successful extraction
|
os.remove(file_path) # delete compressed file after successful extraction
|
||||||
else:
|
else:
|
||||||
if file_path_lowercase.endswith('.txt'):
|
if file_path_lowercase.endswith('.txt'):
|
||||||
comment = get_comment_from_submission_txt(file_path) # get student comment (if any) from submission txt file
|
comment, name = get_comment_from_submission_txt(file_path) # get student comment (if any), and name, from submission txt file
|
||||||
if comment:
|
if comment and name:
|
||||||
comments_filename = f'{dest_dir}_comments.txt'
|
comments_filename = f'{dest_dir}_comments.txt'
|
||||||
with open(comments_filename, 'a') as f:
|
with open(comments_filename, 'a') as f:
|
||||||
f.write(f'\nStudent number: {student_no} - File: {file_path}\nComment: {comment}\n')
|
f.write(f'\nStudent number: {student_no} - Student name: {name}\nFile: {file_path}\nComment: {comment}\n')
|
||||||
else:
|
else:
|
||||||
file_name = file_name.split('_attempt_')[1].split('_', 1)[1] # rename any remaining files before moving - remove the BB generated info added to the original file name
|
file_name = file_name.split('_attempt_')[1].split('_', 1)[1] # rename any remaining files before moving - remove the BB generated info added to the original file name
|
||||||
new_file_path = os.path.join(student_dir, os.path.basename(file_name))
|
new_file_path = os.path.join(student_dir, os.path.basename(file_name))
|
||||||
@@ -88,7 +91,7 @@ def organise_gradebook(src_dir: str, dest_dir: str) -> None:
|
|||||||
print('\nGetting gradebook stats...', flush=True)
|
print('\nGetting gradebook stats...', flush=True)
|
||||||
files_counter = get_gradebook_stats(src_dir) # print stats about the files in gradebook and get files_counter dict to use later
|
files_counter = get_gradebook_stats(src_dir) # print stats about the files in gradebook and get files_counter dict to use later
|
||||||
students_numbers: list[str] = [] # list to add and count unique student numbers from all files in gradebook
|
students_numbers: list[str] = [] # list to add and count unique student numbers from all files in gradebook
|
||||||
print('\nStart organising... (this may take a while depending on the number of submissions)\n', flush=True)
|
print('\nStart organising... (this may take a while depending on the number -and size- of submissions)\n', flush=True)
|
||||||
|
|
||||||
for file_name in os.listdir(src_dir): # iterate through all files in the directory
|
for file_name in os.listdir(src_dir): # iterate through all files in the directory
|
||||||
if BAD_DIR_NAME not in file_name: # ignore dir BAD_DIR_NAME (created after first run if corrupt compressed files found)
|
if BAD_DIR_NAME not in file_name: # ignore dir BAD_DIR_NAME (created after first run if corrupt compressed files found)
|
||||||
@@ -96,15 +99,17 @@ def organise_gradebook(src_dir: str, dest_dir: str) -> None:
|
|||||||
students_numbers.append(student_no)
|
students_numbers.append(student_no)
|
||||||
organise_file_per_student(src_dir, dest_dir, file_name, student_no)
|
organise_file_per_student(src_dir, dest_dir, file_name, student_no)
|
||||||
|
|
||||||
|
ignored_str = ', '.join(IGNORE_DIRS)
|
||||||
|
print(f'[Info] Skipped extracting files in dirs with name that includes any of the following strings: {ignored_str}\n', flush=True)
|
||||||
abs_path = os.getcwd() # absolute path of main script
|
abs_path = os.getcwd() # absolute path of main script
|
||||||
print(f'[Info] Submissions organised into directory: {os.path.join(abs_path, dest_dir)}', flush=True)
|
print(f'[Info] Submissions organised into directory: {os.path.join(abs_path, dest_dir)}\n', flush=True)
|
||||||
print(f'[Info] Unique student numbers in gradebook files: {len(set(students_numbers))}', flush=True)
|
print(f'[Info] Unique student numbers in gradebook files: {len(set(students_numbers))}\n', flush=True)
|
||||||
if files_counter['.txt'] == 0:
|
if files_counter['.txt'] == 0:
|
||||||
print(f'[Info] No submission text files found, file with comments not created', flush=True)
|
print(f'[Info] No submission text files found, file with comments not created\n', flush=True)
|
||||||
else:
|
else:
|
||||||
print(f'[Info] Comments in file: {dest_dir}_comments.txt', flush=True)
|
print(f'[Info] Comments in file: {dest_dir}_comments.txt\n', flush=True)
|
||||||
|
|
||||||
print(f'[Note] Compressed files (.zip, .rar, .7z) are automatically deleted from the gradebook directory after successful extraction', flush=True)
|
print(f'[Note] Compressed files (.zip, .rar, .7z) are automatically deleted from the gradebook directory after successful extraction\n', flush=True)
|
||||||
|
|
||||||
def check_submissions_dir_for_compressed(submissions_dir: str) -> None:
|
def check_submissions_dir_for_compressed(submissions_dir: str) -> None:
|
||||||
"""checks if any submitted compressed files contain more compressed files inside (they are not recursively extracted)
|
"""checks if any submitted compressed files contain more compressed files inside (they are not recursively extracted)
|
||||||
|
|||||||
@@ -5,3 +5,4 @@ BB_GRADEBOOKS_DIR = 'BB_gradebooks' # directory with extracted gradebooks downl
|
|||||||
BB_SUBMISSIONS_DIR = 'BB_submissions' # directory with organised gradebook submissions
|
BB_SUBMISSIONS_DIR = 'BB_submissions' # directory with organised gradebook submissions
|
||||||
BAD_DIR_NAME = '__BAD__' # for organise_gradebook.py - directory with corrupt/invalid compressed files
|
BAD_DIR_NAME = '__BAD__' # for organise_gradebook.py - directory with corrupt/invalid compressed files
|
||||||
CSV_DIR = os.path.join(os.getcwd(), 'csv-inspect') # for inspect_gradebook.py and inspect_submissions.py - output dir for generated CSV files
|
CSV_DIR = os.path.join(os.getcwd(), 'csv-inspect') # for inspect_gradebook.py and inspect_submissions.py - output dir for generated CSV files
|
||||||
|
IGNORE_DIRS = [ '__MACOSX', 'vendor', 'node_modules' ] # list of dir names to ignore from extracting
|
||||||
|
|||||||
Reference in New Issue
Block a user