skip "empty" (based on MIN FILE SIZE) files from inspection
This commit is contained in:
@@ -5,7 +5,7 @@ import hashlib
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from utils.settings import CSV_DIR, BB_GRADEBOOKS_DIR, BB_SUBMISSIONS_DIR
|
from utils.settings import CSV_DIR, BB_GRADEBOOKS_DIR, BB_SUBMISSIONS_DIR, MIN_FILESIZE_IN_BYTES
|
||||||
|
|
||||||
|
|
||||||
def load_excluded_filenames(submissions_dir_name: str) -> list[str]: # helper function for hashing all files
|
def load_excluded_filenames(submissions_dir_name: str) -> list[str]: # helper function for hashing all files
|
||||||
@@ -31,10 +31,13 @@ def get_hashes_in_dir(dir_path: str, excluded_filenames: list = []) -> list: #
|
|||||||
for filename in files:
|
for filename in files:
|
||||||
if filename.lower() not in excluded_filenames: # convert to lowercase for comparison with excluded files & do not hash if in the excluded list
|
if filename.lower() not in excluded_filenames: # convert to lowercase for comparison with excluded files & do not hash if in the excluded list
|
||||||
filepath = os.path.join(subdir, filename)
|
filepath = os.path.join(subdir, filename)
|
||||||
|
if os.path.getsize(filepath) > MIN_FILESIZE_IN_BYTES: # file size more than MIN_FILESIZE_IN_BYTES (as set in settings.py)
|
||||||
with open(filepath, 'rb') as f:
|
with open(filepath, 'rb') as f:
|
||||||
filehash = hashlib.sha256(f.read()).hexdigest()
|
filehash = hashlib.sha256(f.read()).hexdigest()
|
||||||
if filehash != 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855': # do not include hashes of empty files
|
#if filehash != 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855': # do not include hashes of empty files
|
||||||
hash_list.append({ 'filepath': filepath, 'filename': filename, 'sha256 hash': filehash})
|
hash_list.append({ 'filepath': filepath, 'filename': filename, 'sha256 hash': filehash})
|
||||||
|
# else:
|
||||||
|
# print(f'size: {os.path.getsize(filepath)}B, {filepath}')
|
||||||
return hash_list
|
return hash_list
|
||||||
|
|
||||||
def generate_hashes_gradebook(gradebook_dir_path: str) -> str: # main function for hashing all files in gradebook
|
def generate_hashes_gradebook(gradebook_dir_path: str) -> str: # main function for hashing all files in gradebook
|
||||||
|
|||||||
Reference in New Issue
Block a user