Files
VKR_2026/main.py
podiukov.iv 23fe47644f add dataset
2026-04-28 06:06:43 +05:00

30 lines
1.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Точка входа программы
import csv
from modules.NER import NER
from modules.paraGenerator import ParaphraseGenerator
from modules.validator import validator
ner = NER()
pg = ParaphraseGenerator()
datasetPath = 'rupaws/rupaws_wiki_test.csv'
srcTexts = []
with open(datasetPath, 'r') as f:
reader = csv.reader(f, delimiter=';')
for row in reader:
srcTexts.append(row[1])
def main(srcText):
srcEntities = ner.extract_entities(srcText) # поиск сущностей
print(f'ИСХОДНЫЕ СУЩНОСТИ ~> {srcEntities}')
paraphrase = pg.generate(srcText, srcEntities) # генерация парафраза
paraEntities = ner.extract_entities(paraphrase) # поиск сущностей в парафразе
return validator(srcText, paraphrase, srcEntities, paraEntities) # валидация
for i in range(10): # проверим 10 текстов
text = srcTexts[i]
print(f'ИСХОДНЫЙ ТЕКСТ ~> {text}')
print(f'ПАРАФРАЗИРОВАННЫЙ ТЕКСТ ~> {main(text)}\n')