database-2025/analysis/data_generator.py

402 lines
18 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#
# "contingent-movement" project;
# author: gazakbayev.net
# ver: 1.0
#
from faker import Faker
import random
from datetime import datetime, timedelta
from manager import *
Database.initialize()
fake = Faker('ru_RU')
class DataStorage:
physicals = []
supervisors = []
faculties = []
departments = []
programs = []
groups = []
students = []
disciplines = []
def generate_physicals(count=30):
for _ in range(count):
access_level = random.choice(['КАМПУС', 'ОБЩЕЖИТИЯ', 'ПОЛНЫЙ'])
access_card = ":".join(f"{random.randint(0, 255):02X}" for _ in range(6))
data = [
fake.unique.passport_number(),
fake.first_name(),
fake.last_name(),
fake.date_of_birth(minimum_age=18, maximum_age=80),
fake.phone_number(),
fake.email(),
fake.country(),
fake.address(),
access_card,
access_level
]
DataStorage.physicals.append(data[0])
physicals_write(data)
def generate_supervisors(count=5):
if not DataStorage.physicals:
generate_physicals(10)
for _ in range(count):
data = [
random.choice(DataStorage.physicals),
random.randint(1, 30),
round(random.uniform(0.3, 1.0), 2),
random.choice(['МЛАДШИЙ НАУЧНЫЙ СОТРУДНИК', 'СТАРШИЙ НАУЧНЫЙ СОТРУДНИК',
'ВЕДУЩИЙ НАУЧНЫЙ СОТРУДНИК', 'ГЛАВНЫЙ НАУЧНЫЙ СОТРУДНИК', 'НАУЧНЫЙ СОТРУДНИК'])
]
DataStorage.supervisors.append(data[0])
supervisors_write(data)
def generate_faculties(count=3):
faculty_names = [
"Факультет компьютерных наук",
"Физико-математический факультет",
"Факультет экономики",
"Юридический факультет",
"Филологический факультет"
]
if not DataStorage.physicals:
generate_physicals(10)
for i in range(count):
head = random.choice(DataStorage.physicals)
vice = random.choice([p for p in DataStorage.physicals if p != head] + [None])
data = [
faculty_names[i],
faculty_names[i][:3].upper(),
head,
vice,
fake.address()
]
DataStorage.faculties.append(data)
faculties_write(data)
def generate_departments(count=5):
department_names = [
"Кафедра программной инженерии",
"Кафедра искусственного интеллекта",
"Кафедра теоретической физики",
"Кафедра прикладной математики",
"Кафедра экономической теории",
"Кафедра системного анализа",
"Кафедра кибербезопасности",
"Кафедра биоинформатики"
]
if not DataStorage.faculties:
generate_faculties()
if not DataStorage.physicals:
generate_physicals(10)
actual_count = min(count, len(department_names))
for i in range(actual_count):
head = random.choice(DataStorage.physicals)
vice = random.choice([p for p in DataStorage.physicals if p != head] + [None])
secretary = random.choice([p for p in DataStorage.physicals if p != head and p != vice] + [None])
data = [
department_names[i],
department_names[i][:3].upper(),
fake.date_between(start_date='-30y', end_date='-5y'),
head,
vice,
secretary,
random.choice(range(1, len(DataStorage.faculties)+1))
]
DataStorage.departments.append(data)
departments_write(data)
for i in range(actual_count, count):
dept_name = f"Кафедра {fake.unique.word().capitalize()}"
head = random.choice(DataStorage.physicals)
vice = random.choice([p for p in DataStorage.physicals if p != head] + [None])
secretary = random.choice([p for p in DataStorage.physicals if p != head and p != vice] + [None])
data = [
dept_name,
dept_name[:3].upper(),
fake.date_between(start_date='-30y', end_date='-5y'),
head,
vice,
secretary,
random.choice(range(1, len(DataStorage.faculties)+1))
]
DataStorage.departments.append(data)
departments_write(data)
def generate_programs(count=8):
program_names = [
"Программная инженерия",
"Искусственный интеллект и машинное обучение",
"Теоретическая физика",
"Прикладная математика и информатика",
"Экономика и финансы",
"Юриспруденция",
"Филология и лингвистика",
"Бизнес-информатика",
"Биоинженерия",
"Кибербезопасность",
"Международные отношения",
"Психология"
]
actual_count = min(count, len(program_names))
for i in range(actual_count):
parent_id = random.choice([None] + list(range(1, i+1)))
data = [
f"SPEC-{fake.unique.bothify(text='??-####')}",
random.choice(['BACHELOR', 'MAGISTER', 'ASPIRANT']),
program_names[i],
parent_id
]
DataStorage.programs.append(data)
programs_write(data)
for i in range(actual_count, count):
program_name = f"{fake.word().capitalize()} {fake.word().capitalize()}"
parent_id = random.choice([None] + list(range(1, len(DataStorage.programs)+1)))
data = [
f"SPEC-{fake.unique.bothify(text='??-####')}",
random.choice(['BACHELOR', 'MAGISTER', 'ASPIRANT']),
program_name,
parent_id
]
DataStorage.programs.append(data)
programs_write(data)
def generate_groups(count=5):
if not DataStorage.faculties or not DataStorage.programs or not DataStorage.departments:
generate_faculties()
generate_programs()
generate_departments()
group_prefixes = ['А', 'Б', 'М']
cyrillic_lower = [chr(c) for c in range(1072, 1104)]
for _ in range(count):
prefix = random.choice(group_prefixes)
year = fake.numerify(text='##')
num = fake.numerify(text='###')
letter = random.choice([''] + cyrillic_lower)
group_id = f"{prefix}{year}-{num}{letter}"
study_starts = fake.date_between(start_date='-4y', end_date='today')
study_ends = study_starts + timedelta(days=1460)
data = [
group_id,
random.choice(range(1, len(DataStorage.faculties)+1)),
random.choice(range(1, len(DataStorage.programs)+1)),
random.choice(range(1, len(DataStorage.departments)+1)),
study_starts,
study_ends
]
DataStorage.groups.append(data)
groups_write(data)
def generate_students(count=10):
if not DataStorage.physicals or not DataStorage.groups or not DataStorage.supervisors:
generate_physicals(150)
generate_groups()
generate_supervisors()
statuses = ['УЧИТСЯ', 'В АКАДЕМИЧЕСКОМ ОТПУСКЕ', 'ОТЧИСЛЕН']
education_forms = ['ОЧНАЯ', 'ЗАОЧНАЯ', 'ВЕЧЕРНЯЯ']
for _ in range(count):
data = [
random.choice(DataStorage.physicals),
random.choice([g[0] for g in DataStorage.groups]),
random.choice(DataStorage.supervisors + [None]),
random.choice(education_forms),
random.choices(statuses, weights=[0.85, 0.1, 0.05])[0]
]
DataStorage.students.append(data)
students_write(data)
def generate_family(count=20):
if not DataStorage.students:
generate_students()
kinships = ['MOTHER', 'FATHER', 'BROTHER', 'SISTER', 'ANOTHER']
for _ in range(count):
student = random.choice(DataStorage.students)[0]
kinship = random.choice(kinships)
data = [
student,
fake.first_name(),
fake.last_name(),
kinship,
fake.phone_number(),
fake.address()
]
family_write(data)
def generate_disciplines(count=7):
if not DataStorage.departments:
generate_departments()
discipline_names = [
"Программирование на Python",
"Базы данных",
"Машинное обучение",
"Теоретическая механика",
"Дифференциальные уравнения",
"Эконометрика",
"Гражданское право",
"История литературы"
]
for _ in range(count):
academic_hours = random.randint(36, 72)
general_hours = academic_hours + random.randint(1, 36)
data = [
f"{random.choice(discipline_names)} {fake.numerify(text='###')}",
random.choice(range(1, len(DataStorage.departments)+1)),
random.randint(2, 6),
academic_hours,
general_hours,
random.choice([True, False])
]
DataStorage.disciplines.append(data)
disciplines_write(data)
def generate_statements(count=300):
if not DataStorage.students or not DataStorage.disciplines or not DataStorage.physicals:
generate_students()
generate_disciplines()
generate_physicals()
for _ in range(count):
data = [
random.choice(range(1, len(DataStorage.students)+1)),
random.choice(range(1, len(DataStorage.disciplines)+1)),
random.choice(DataStorage.physicals),
random.randint(0, 2),
random.randint(3, 10),
fake.date_between(start_date='-2y', end_date='today')
]
statements_write(data)
def generate_movement(count=50):
if not DataStorage.students or not DataStorage.groups:
generate_students()
generate_groups()
movement_types = ['ЗАЧИСЛЕН', 'ВОССТАНОВЛЕН', 'ОТЧИСЛЕН', 'В АКАДЕМИЧЕСКИЙ ОТПУСК', 'ПЕРЕВОД В ДРУГУЮ ГРУППУ']
statuses = ['УЧИТСЯ', 'В АКАДЕМИЧЕСКОМ ОТПУСКЕ', 'ОТЧИСЛЕН']
for _ in range(count):
movement_type = random.choice(movement_types)
if movement_type == 'ПЕРЕВОД В ДРУГУЮ ГРУППУ':
new_group = random.choice([g[0] for g in DataStorage.groups if g[0] != random.choice([g[0] for g in DataStorage.groups])])
new_status = 'УЧИТСЯ'
elif movement_type == 'В АКАДЕМИЧЕСКИЙ ОТПУСК':
new_group = None
new_status = 'В АКАДЕМИЧЕСКОМ ОТПУСКЕ'
else:
new_group = None
new_status = random.choice(statuses)
data = [
random.choice(range(1, len(DataStorage.students)+1)),
movement_type,
new_group,
new_status,
fake.date_between(start_date='-2y', end_date='today')
]
movement_write(data)
def generate_files(count=100):
if not DataStorage.students:
generate_students()
extensions = ['PNG', 'JPEG', 'PDF']
for _ in range(count):
data = [
random.choice(range(1, len(DataStorage.students)+1)),
fake.file_name(),
fake.sentence(),
random.choice(extensions),
round(random.uniform(0.1, 20.0), 2),
f"/uploads/{fake.unique.uuid4()}"
]
files_write(data)
import debug_data.limits as lim
def generate_all_data():
generate_physicals(lim.PHYSICALS)
generate_supervisors(lim.SUPERVISORS)
generate_faculties(lim.FACULTIES)
generate_departments(lim.DEPARTMENTS)
generate_programs(lim.PROGRAMS)
generate_groups(lim.GROUPS)
generate_students(lim.STUDENTS)
generate_family(lim.FAMILY)
generate_disciplines(lim.DISCIPLINES)
generate_statements(lim.STATEMENTS)
generate_movement(lim.MOVEMENTS)
generate_files(lim.FILES)
if __name__ == "__main__":
generate_all_data()
print("Генерация тестовых данных завершена!")
# /$$$$$$ /$$ /$$ /$$
# /$$__ $$ | $$ |__/ | $$
# | $$ \__/ /$$$$$$ /$$$$$$$ /$$$$$$ /$$ /$$$$$$$ /$$$$$$ /$$$$$$ /$$$$$$$ /$$$$$$
# | $$ /$$__ $$| $$__ $$|_ $$_/ | $$| $$__ $$ /$$__ $$ /$$__ $$| $$__ $$|_ $$_/
# | $$ | $$ \ $$| $$ \ $$ | $$ | $$| $$ \ $$| $$ \ $$| $$$$$$$$| $$ \ $$ | $$
# | $$ $$| $$ | $$| $$ | $$ | $$ /$$| $$| $$ | $$| $$ | $$| $$_____/| $$ | $$ | $$ /$$
# | $$$$$$/| $$$$$$/| $$ | $$ | $$$$/| $$| $$ | $$| $$$$$$$| $$$$$$$| $$ | $$ | $$$$/
# \______/ \______/ |__/ |__/ \___/ |__/|__/ |__/ \____ $$ \_______/|__/ |__/ \___/
# /$$ \ $$
# | $$$$$$/
# \______/
# /$$ /$$ /$$
# | $$$ /$$$ | $$
# | $$$$ /$$$$ /$$$$$$ /$$ /$$ /$$$$$$ /$$$$$$/$$$$ /$$$$$$ /$$$$$$$ /$$$$$$
# | $$ $$/$$ $$ /$$__ $$| $$ /$$//$$__ $$| $$_ $$_ $$ /$$__ $$| $$__ $$|_ $$_/
# | $$ $$$| $$| $$ \ $$ \ $$/$$/| $$$$$$$$| $$ \ $$ \ $$| $$$$$$$$| $$ \ $$ | $$
# | $$\ $ | $$| $$ | $$ \ $$$/ | $$_____/| $$ | $$ | $$| $$_____/| $$ | $$ | $$ /$$
# | $$ \/ | $$| $$$$$$/ \ $/ | $$$$$$$| $$ | $$ | $$| $$$$$$$| $$ | $$ | $$$$/
# |__/ |__/ \______/ \_/ \_______/|__/ |__/ |__/ \_______/|__/ |__/ \___/
# /$$ /$$ /$$ /$$
# | $$ | $$ | $$ | $$
# | $$$$$$$ /$$ /$$ /$$$$$$ /$$$$$$ /$$$$$$$$ /$$$$$$ | $$ /$$| $$$$$$$ /$$$$$$ /$$ /$$ /$$$$$$ /$$ /$$ /$$$$$$$ /$$$$$$ /$$$$$$
# | $$__ $$| $$ | $$ /$$__ $$ |____ $$|____ /$$/ |____ $$| $$ /$$/| $$__ $$ |____ $$| $$ | $$ /$$__ $$| $$ /$$/| $$__ $$ /$$__ $$|_ $$_/
# | $$ \ $$| $$ | $$ | $$ \ $$ /$$$$$$$ /$$$$/ /$$$$$$$| $$$$$$/ | $$ \ $$ /$$$$$$$| $$ | $$| $$$$$$$$ \ $$/$$/ | $$ \ $$| $$$$$$$$ | $$
# | $$ | $$| $$ | $$ | $$ | $$ /$$__ $$ /$$__/ /$$__ $$| $$_ $$ | $$ | $$ /$$__ $$| $$ | $$| $$_____/ \ $$$/ | $$ | $$| $$_____/ | $$ /$$
# | $$$$$$$/| $$$$$$$ | $$$$$$$| $$$$$$$ /$$$$$$$$| $$$$$$$| $$ \ $$| $$$$$$$/| $$$$$$$| $$$$$$$| $$$$$$$ \ $//$$| $$ | $$| $$$$$$$ | $$$$/
# |_______/ \____ $$ \____ $$ \_______/|________/ \_______/|__/ \__/|_______/ \_______/ \____ $$ \_______/ \_/|__/|__/ |__/ \_______/ \___/
# /$$ | $$ /$$ \ $$ /$$ | $$
# | $$$$$$/ | $$$$$$/ | $$$$$$/
# \______/ \______/ \______/