mirror of
https://github.com/EDeev/api_processor.git
synced 2026-06-15 19:11:01 +03:00
v. 0.1
This commit is contained in:
commit
0810f20173
24 changed files with 651 additions and 0 deletions
2
api_app/__init__.py
Normal file
2
api_app/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
# api_project/api_app/__init__.py
|
||||||
|
# Пустой файл инициализации для Python-пакета
|
||||||
2
api_app/grpc_client/__init__.py
Normal file
2
api_app/grpc_client/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
# api_project/api_app/grpc_client/__init__.py
|
||||||
|
# Пустой файл инициализации для Python-пакета
|
||||||
90
api_app/grpc_client/client.py
Normal file
90
api_app/grpc_client/client.py
Normal file
|
|
@ -0,0 +1,90 @@
|
||||||
|
# api_project/api_app/grpc_client/client.py
|
||||||
|
import grpc
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Добавляем путь для импорта сгенерированных протофайлов
|
||||||
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
proto_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(current_dir))), 'proto')
|
||||||
|
sys.path.append(proto_dir)
|
||||||
|
|
||||||
|
# Пробуем импортировать сгенерированные протофайлы
|
||||||
|
# Если не получится, используем заглушки
|
||||||
|
try:
|
||||||
|
import text_service_pb2
|
||||||
|
import text_service_pb2_grpc
|
||||||
|
print("Успешно импортированы сгенерированные proto файлы")
|
||||||
|
except ImportError:
|
||||||
|
print("Не удалось импортировать сгенерированные proto файлы, используем заглушки")
|
||||||
|
|
||||||
|
# Создаем заглушки
|
||||||
|
class TextRequest:
|
||||||
|
def __init__(self, text):
|
||||||
|
self.text = text
|
||||||
|
|
||||||
|
class TextResponse:
|
||||||
|
def __init__(self, processed_text, success, error):
|
||||||
|
self.processed_text = processed_text
|
||||||
|
self.success = success
|
||||||
|
self.error = error
|
||||||
|
|
||||||
|
class TextProcessorStub:
|
||||||
|
def __init__(self, channel):
|
||||||
|
self.channel = channel
|
||||||
|
|
||||||
|
def ProcessText(self, request):
|
||||||
|
# Эмулируем ответ от gRPC сервера
|
||||||
|
return TextResponse(
|
||||||
|
processed_text=f"ЗАГЛУШКА ОБРАБОТКИ ТЕКСТА: {request.text}",
|
||||||
|
success=True,
|
||||||
|
error=""
|
||||||
|
)
|
||||||
|
|
||||||
|
# Создаем модуль заглушки
|
||||||
|
class text_service_pb2:
|
||||||
|
TextRequest = TextRequest
|
||||||
|
TextResponse = TextResponse
|
||||||
|
|
||||||
|
class text_service_pb2_grpc:
|
||||||
|
TextProcessorStub = TextProcessorStub
|
||||||
|
|
||||||
|
def send_to_grpc_server(text: str) -> dict:
|
||||||
|
"""
|
||||||
|
Отправляет текст на gRPC сервер для обработки
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Текст для обработки
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Результат обработки
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Создаем соединение с сервером
|
||||||
|
# Для заглушки это не обязательно, но оставим для совместимости
|
||||||
|
try:
|
||||||
|
channel = grpc.insecure_channel('localhost:50051')
|
||||||
|
except NameError:
|
||||||
|
# Если grpc не импортирован, используем заглушку
|
||||||
|
channel = "dummy_channel"
|
||||||
|
|
||||||
|
# Создаем клиент
|
||||||
|
stub = text_service_pb2_grpc.TextProcessorStub(channel)
|
||||||
|
|
||||||
|
# Создаем запрос
|
||||||
|
request = text_service_pb2.TextRequest(text=text)
|
||||||
|
|
||||||
|
# Отправляем запрос
|
||||||
|
response = stub.ProcessText(request)
|
||||||
|
|
||||||
|
# Возвращаем результат
|
||||||
|
return {
|
||||||
|
'processed_text': response.processed_text,
|
||||||
|
'success': response.success,
|
||||||
|
'error': response.error if hasattr(response, 'error') and response.error else None
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
'processed_text': None,
|
||||||
|
'success': False,
|
||||||
|
'error': str(e)
|
||||||
|
}
|
||||||
33
api_app/migrations/0001_initial.py
Normal file
33
api_app/migrations/0001_initial.py
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
# Generated by Django 5.1.3 on 2025-05-06 12:35
|
||||||
|
|
||||||
|
import api_app.models
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
initial = True
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='AudioFile',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('file', models.FileField(upload_to=api_app.models.get_file_path)),
|
||||||
|
('uploaded_at', models.DateTimeField(auto_now_add=True)),
|
||||||
|
('processed_text', models.TextField(blank=True, null=True)),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='DocumentFile',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('file', models.FileField(upload_to=api_app.models.get_file_path)),
|
||||||
|
('uploaded_at', models.DateTimeField(auto_now_add=True)),
|
||||||
|
('processed_text', models.TextField(blank=True, null=True)),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
||||||
0
api_app/migrations/__init__.py
Normal file
0
api_app/migrations/__init__.py
Normal file
25
api_app/models.py
Normal file
25
api_app/models.py
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
# api_project/api_app/models.py
|
||||||
|
from django.db import models
|
||||||
|
import uuid
|
||||||
|
import os
|
||||||
|
|
||||||
|
def get_file_path(instance, filename):
|
||||||
|
ext = filename.split('.')[-1]
|
||||||
|
filename = f"{uuid.uuid4()}.{ext}"
|
||||||
|
return os.path.join('uploads', filename)
|
||||||
|
|
||||||
|
class AudioFile(models.Model):
|
||||||
|
file = models.FileField(upload_to=get_file_path)
|
||||||
|
uploaded_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
processed_text = models.TextField(blank=True, null=True)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"Audio {self.id} - {self.uploaded_at}"
|
||||||
|
|
||||||
|
class DocumentFile(models.Model):
|
||||||
|
file = models.FileField(upload_to=get_file_path)
|
||||||
|
uploaded_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
processed_text = models.TextField(blank=True, null=True)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"Document {self.id} - {self.uploaded_at}"
|
||||||
2
api_app/services/__init__.py
Normal file
2
api_app/services/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
# api_project/api_app/services/__init__.py
|
||||||
|
# Пустой файл инициализации для Python-пакета
|
||||||
66
api_app/services/scan.py
Normal file
66
api_app/services/scan.py
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
import pdfplumber
|
||||||
|
import docx
|
||||||
|
import csv
|
||||||
|
import os
|
||||||
|
from io import StringIO, BytesIO
|
||||||
|
from PIL import Image
|
||||||
|
import base64
|
||||||
|
|
||||||
|
|
||||||
|
def extract_text_tables(file_path: str) -> str:
|
||||||
|
result = ""
|
||||||
|
if file_path.endswith(".pdf"):
|
||||||
|
with pdfplumber.open(file_path) as pdf:
|
||||||
|
for page in pdf.pages:
|
||||||
|
text = page.extract_text()
|
||||||
|
if text:
|
||||||
|
result += "<p>" + text.replace("\n", "</p><p>") + "</p>"
|
||||||
|
|
||||||
|
tables = page.extract_tables()
|
||||||
|
if tables:
|
||||||
|
for table in tables:
|
||||||
|
csv_output = StringIO()
|
||||||
|
csv_writer = csv.writer(csv_output)
|
||||||
|
csv_writer.writerows(table)
|
||||||
|
result += f"<pre>{csv_output.getvalue()}</pre>"
|
||||||
|
|
||||||
|
# Извлечение изображений
|
||||||
|
if page.images:
|
||||||
|
for img in page.images:
|
||||||
|
img_data = img["stream"].get_data()
|
||||||
|
encoded_img = base64.b64encode(img_data).decode("utf-8")
|
||||||
|
result += f'<img src="data:image/png;base64,{encoded_img}"/>'
|
||||||
|
|
||||||
|
elif file_path.endswith(".docx"):
|
||||||
|
doc = docx.Document(file_path)
|
||||||
|
|
||||||
|
text_data = []
|
||||||
|
table_data = []
|
||||||
|
image_data = []
|
||||||
|
|
||||||
|
for para in doc.paragraphs:
|
||||||
|
if para.text.strip():
|
||||||
|
text_data.append(f"<p>{para.text}</p>")
|
||||||
|
|
||||||
|
for table in doc.tables:
|
||||||
|
csv_output = StringIO()
|
||||||
|
csv_writer = csv.writer(csv_output)
|
||||||
|
for row in table.rows:
|
||||||
|
csv_writer.writerow([cell.text.strip() for cell in row.cells])
|
||||||
|
table_data.append(f"<pre>{csv_output.getvalue()}</pre>")
|
||||||
|
|
||||||
|
# Извлечение изображений
|
||||||
|
for rel in doc.part.rels:
|
||||||
|
if "image" in doc.part.rels[rel].target_ref:
|
||||||
|
image_data_blob = doc.part.rels[rel].target_part.blob
|
||||||
|
encoded_img = base64.b64encode(image_data_blob).decode("utf-8")
|
||||||
|
image_data.append(f'<img src="data:image/png;base64,{encoded_img}"/>')
|
||||||
|
|
||||||
|
if text_data:
|
||||||
|
result += "".join(text_data)
|
||||||
|
if table_data:
|
||||||
|
result += "".join(table_data)
|
||||||
|
if image_data:
|
||||||
|
result += "".join(image_data)
|
||||||
|
|
||||||
|
return result
|
||||||
51
api_app/services/vosk_recognizer.py
Normal file
51
api_app/services/vosk_recognizer.py
Normal file
|
|
@ -0,0 +1,51 @@
|
||||||
|
import os, wave, vosk, ffmpeg
|
||||||
|
|
||||||
|
MODEL_PATH = r"models/vosk-model-small-ru-0.22"
|
||||||
|
FFMPEG_PATH = r"models/ffmpeg/bin/ffmpeg.exe"
|
||||||
|
|
||||||
|
def convert_audio_to_wav(input_file, output_file, FFMPEG_PATH):
|
||||||
|
try:
|
||||||
|
(
|
||||||
|
ffmpeg
|
||||||
|
.input(input_file)
|
||||||
|
.output(output_file, format='wav', acodec='pcm_s16le', ar='16000', ac=1,
|
||||||
|
af='acompressor,afftdn,dynaudnorm,aresample=16000') # 16kHz для Vosk
|
||||||
|
.global_args('-loglevel', 'quiet')
|
||||||
|
.run(cmd=FFMPEG_PATH, overwrite_output=True)
|
||||||
|
)
|
||||||
|
print(f"Конвертация завершена: {output_file}")
|
||||||
|
except ffmpeg.Error as e:
|
||||||
|
print("Ошибка при конвертации:", e.stderr.decode())
|
||||||
|
|
||||||
|
|
||||||
|
vosk.SetLogLevel(-1)
|
||||||
|
|
||||||
|
def recognize_speech(audio_path) -> str:
|
||||||
|
if not os.path.exists(MODEL_PATH):
|
||||||
|
print("Ошибка: Модель не найдена!")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
model = vosk.Model(MODEL_PATH)
|
||||||
|
|
||||||
|
if audio_path.split('.')[-1] != "wav":
|
||||||
|
convert.convert_audio_to_wav(audio_path, "audio.wav", FFMPEG_PATH)
|
||||||
|
audio_path = "audio.wav"
|
||||||
|
else:
|
||||||
|
with wave.open(audio_path, "rb") as wf:
|
||||||
|
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() != 16000:
|
||||||
|
convert.convert_audio_to_wav(audio_path, "audio.wav", FFMPEG_PATH)
|
||||||
|
audio_path = "audio.wav"
|
||||||
|
|
||||||
|
|
||||||
|
with wave.open(audio_path, "rb") as wf: # использование vosk
|
||||||
|
recognizer = vosk.KaldiRecognizer(model, wf.getframerate())
|
||||||
|
while True:
|
||||||
|
data = wf.readframes(3200)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
recognizer.AcceptWaveform(data)
|
||||||
|
|
||||||
|
if audio_path == "audio.wav":
|
||||||
|
os.remove(audio_path)
|
||||||
|
|
||||||
|
return recognizer.FinalResult().split(": \"")[-1][:-3]
|
||||||
8
api_app/urls.py
Normal file
8
api_app/urls.py
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
# api_project/api_app/urls.py
|
||||||
|
from django.urls import path
|
||||||
|
from .views import AudioToTextView, DocumentToTextView
|
||||||
|
|
||||||
|
urlpatterns = [
|
||||||
|
path('audio-to-text/', AudioToTextView.as_view(), name='audio-to-text'),
|
||||||
|
path('document-to-text/', DocumentToTextView.as_view(), name='document-to-text'),
|
||||||
|
]
|
||||||
77
api_app/views.py
Normal file
77
api_app/views.py
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
# api_project/api_app/views.py
|
||||||
|
import os
|
||||||
|
from rest_framework import status
|
||||||
|
from rest_framework.views import APIView
|
||||||
|
from rest_framework.response import Response
|
||||||
|
from rest_framework.parsers import MultiPartParser, FormParser
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from .models import AudioFile, DocumentFile
|
||||||
|
from .services.vosk_recognizer import recognize_speech
|
||||||
|
from .services.scan import extract_text_tables
|
||||||
|
from .grpc_client.client import send_to_grpc_server
|
||||||
|
|
||||||
|
class AudioToTextView(APIView):
|
||||||
|
parser_classes = (MultiPartParser, FormParser)
|
||||||
|
|
||||||
|
def post(self, request, *args, **kwargs):
|
||||||
|
audio_file = request.FILES.get('audio')
|
||||||
|
|
||||||
|
if not audio_file:
|
||||||
|
return Response({'error': 'Нет аудио файла'}, status=status.HTTP_400_BAD_REQUEST)
|
||||||
|
|
||||||
|
audio_model = AudioFile(file=audio_file)
|
||||||
|
audio_model.save()
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_path = os.path.join(settings.MEDIA_ROOT, audio_model.file.name)
|
||||||
|
|
||||||
|
text = recognize_speech(file_path)
|
||||||
|
|
||||||
|
audio_model.processed_text = text
|
||||||
|
audio_model.save()
|
||||||
|
|
||||||
|
grpc_response = send_to_grpc_server(text)
|
||||||
|
|
||||||
|
return Response({
|
||||||
|
'text': text,
|
||||||
|
'grpc_response': grpc_response
|
||||||
|
}, status=status.HTTP_200_OK)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
||||||
|
|
||||||
|
class DocumentToTextView(APIView):
|
||||||
|
parser_classes = (MultiPartParser, FormParser)
|
||||||
|
|
||||||
|
def post(self, request, *args, **kwargs):
|
||||||
|
document_file = request.FILES.get('document')
|
||||||
|
|
||||||
|
if not document_file:
|
||||||
|
return Response({'error': 'Нет документа'}, status=status.HTTP_400_BAD_REQUEST)
|
||||||
|
|
||||||
|
file_ext = os.path.splitext(document_file.name)[1].lower()
|
||||||
|
if file_ext not in ['.pdf', '.docx']:
|
||||||
|
return Response({'error': 'Поддерживаются только PDF и DOCX файлы'},
|
||||||
|
status=status.HTTP_400_BAD_REQUEST)
|
||||||
|
|
||||||
|
doc_model = DocumentFile(file=document_file)
|
||||||
|
doc_model.save()
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_path = os.path.join(settings.MEDIA_ROOT, doc_model.file.name)
|
||||||
|
|
||||||
|
text = extract_text_tables(file_path)
|
||||||
|
|
||||||
|
doc_model.processed_text = text
|
||||||
|
doc_model.save()
|
||||||
|
|
||||||
|
grpc_response = send_to_grpc_server(text)
|
||||||
|
|
||||||
|
return Response({
|
||||||
|
'text': text,
|
||||||
|
'grpc_response': grpc_response
|
||||||
|
}, status=status.HTTP_200_OK)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
||||||
2
api_project/__init__.py
Normal file
2
api_project/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
# api_project/api_project/__init__.py
|
||||||
|
# Пустой файл инициализации для Python-пакета
|
||||||
69
api_project/settings.py
Normal file
69
api_project/settings.py
Normal file
|
|
@ -0,0 +1,69 @@
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||||
|
|
||||||
|
SECRET_KEY = 'django-insecure-)+yykzv8cr7dbc38g2#x(8*ifs@+-f_fyan9!c%mmxg1$ekztq'
|
||||||
|
|
||||||
|
DEBUG = True
|
||||||
|
|
||||||
|
ALLOWED_HOSTS = []
|
||||||
|
|
||||||
|
INSTALLED_APPS = [
|
||||||
|
'django.contrib.admin',
|
||||||
|
'django.contrib.auth',
|
||||||
|
'django.contrib.contenttypes',
|
||||||
|
'django.contrib.sessions',
|
||||||
|
'django.contrib.messages',
|
||||||
|
# 'django.contrib.staticfiles',
|
||||||
|
'rest_framework', # Добавляем DRF
|
||||||
|
'api_app', # Наше API приложение
|
||||||
|
]
|
||||||
|
|
||||||
|
MIDDLEWARE = [
|
||||||
|
'django.middleware.security.SecurityMiddleware',
|
||||||
|
'django.contrib.sessions.middleware.SessionMiddleware',
|
||||||
|
'django.middleware.common.CommonMiddleware',
|
||||||
|
'django.middleware.csrf.CsrfViewMiddleware',
|
||||||
|
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||||
|
'django.contrib.messages.middleware.MessageMiddleware',
|
||||||
|
'django.middleware.clickjacking.XFrameOptionsMiddleware',
|
||||||
|
]
|
||||||
|
|
||||||
|
ROOT_URLCONF = 'api_project.urls'
|
||||||
|
|
||||||
|
TEMPLATES = [
|
||||||
|
{
|
||||||
|
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||||
|
'DIRS': [],
|
||||||
|
'APP_DIRS': True,
|
||||||
|
'OPTIONS': {
|
||||||
|
'context_processors': [
|
||||||
|
'django.template.context_processors.debug',
|
||||||
|
'django.template.context_processors.request',
|
||||||
|
'django.contrib.auth.context_processors.auth',
|
||||||
|
'django.contrib.messages.context_processors.messages',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
WSGI_APPLICATION = 'api_project.wsgi.application'
|
||||||
|
|
||||||
|
DATABASES = {
|
||||||
|
'default': {
|
||||||
|
'ENGINE': 'django.db.backends.sqlite3',
|
||||||
|
'NAME': BASE_DIR / 'db.sqlite3',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Путь для загрузки файлов
|
||||||
|
MEDIA_URL = '/media/'
|
||||||
|
MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
|
||||||
|
|
||||||
|
# Настройки для REST Framework
|
||||||
|
REST_FRAMEWORK = {
|
||||||
|
'DEFAULT_PERMISSION_CLASSES': [
|
||||||
|
'rest_framework.permissions.AllowAny', # Для тестирования, в продакшне лучше ограничить
|
||||||
|
]
|
||||||
|
}
|
||||||
13
api_project/urls.py
Normal file
13
api_project/urls.py
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
# api_project/api_project/urls.py
|
||||||
|
from django.contrib import admin
|
||||||
|
from django.urls import path, include
|
||||||
|
from django.conf import settings
|
||||||
|
from django.conf.urls.static import static
|
||||||
|
|
||||||
|
urlpatterns = [
|
||||||
|
path('admin/', admin.site.urls),
|
||||||
|
path('api/', include('api_app.urls')),
|
||||||
|
]
|
||||||
|
|
||||||
|
if settings.DEBUG:
|
||||||
|
urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
|
||||||
14
api_project/wsgi.py
Normal file
14
api_project/wsgi.py
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
# api_project/api_project/wsgi.py
|
||||||
|
"""
|
||||||
|
WSGI config for api_project project.
|
||||||
|
|
||||||
|
It exposes the WSGI callable as a module-level variable named ``application``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from django.core.wsgi import get_wsgi_application
|
||||||
|
|
||||||
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'api_project.settings')
|
||||||
|
|
||||||
|
application = get_wsgi_application()
|
||||||
BIN
db.sqlite3
Normal file
BIN
db.sqlite3
Normal file
Binary file not shown.
20
manage.py
Normal file
20
manage.py
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
"""Django's command-line utility for administrative tasks."""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run administrative tasks."""
|
||||||
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'api_project.settings')
|
||||||
|
try:
|
||||||
|
from django.core.management import execute_from_command_line
|
||||||
|
except ImportError as exc:
|
||||||
|
raise ImportError(
|
||||||
|
"Couldn't import Django. Are you sure it's installed?"
|
||||||
|
) from exc
|
||||||
|
execute_from_command_line(sys.argv)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
BIN
media/uploads/audio.ogg
Normal file
BIN
media/uploads/audio.ogg
Normal file
Binary file not shown.
BIN
media/uploads/Деев Е.В. Резюме.pdf
Normal file
BIN
media/uploads/Деев Е.В. Резюме.pdf
Normal file
Binary file not shown.
18
proto/text_service.proto
Normal file
18
proto/text_service.proto
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
// proto/text_service.proto
|
||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
package text_service;
|
||||||
|
|
||||||
|
service TextProcessor {
|
||||||
|
rpc ProcessText(TextRequest) returns (TextResponse) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
message TextRequest {
|
||||||
|
string text = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message TextResponse {
|
||||||
|
string processed_text = 1;
|
||||||
|
bool success = 2;
|
||||||
|
string error = 3;
|
||||||
|
}
|
||||||
40
proto/text_service_pb2.py
Normal file
40
proto/text_service_pb2.py
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||||
|
# NO CHECKED-IN PROTOBUF GENCODE
|
||||||
|
# source: text_service.proto
|
||||||
|
# Protobuf Python Version: 5.29.0
|
||||||
|
"""Generated protocol buffer code."""
|
||||||
|
from google.protobuf import descriptor as _descriptor
|
||||||
|
from google.protobuf import descriptor_pool as _descriptor_pool
|
||||||
|
from google.protobuf import runtime_version as _runtime_version
|
||||||
|
from google.protobuf import symbol_database as _symbol_database
|
||||||
|
from google.protobuf.internal import builder as _builder
|
||||||
|
_runtime_version.ValidateProtobufRuntimeVersion(
|
||||||
|
_runtime_version.Domain.PUBLIC,
|
||||||
|
5,
|
||||||
|
29,
|
||||||
|
0,
|
||||||
|
'',
|
||||||
|
'text_service.proto'
|
||||||
|
)
|
||||||
|
# @@protoc_insertion_point(imports)
|
||||||
|
|
||||||
|
_sym_db = _symbol_database.Default()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x12text_service.proto\x12\x0ctext_service\"\x1b\n\x0bTextRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\"F\n\x0cTextResponse\x12\x16\n\x0eprocessed_text\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\x12\r\n\x05\x65rror\x18\x03 \x01(\t2W\n\rTextProcessor\x12\x46\n\x0bProcessText\x12\x19.text_service.TextRequest\x1a\x1a.text_service.TextResponse\"\x00\x62\x06proto3')
|
||||||
|
|
||||||
|
_globals = globals()
|
||||||
|
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
||||||
|
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'text_service_pb2', _globals)
|
||||||
|
if not _descriptor._USE_C_DESCRIPTORS:
|
||||||
|
DESCRIPTOR._loaded_options = None
|
||||||
|
_globals['_TEXTREQUEST']._serialized_start=36
|
||||||
|
_globals['_TEXTREQUEST']._serialized_end=63
|
||||||
|
_globals['_TEXTRESPONSE']._serialized_start=65
|
||||||
|
_globals['_TEXTRESPONSE']._serialized_end=135
|
||||||
|
_globals['_TEXTPROCESSOR']._serialized_start=137
|
||||||
|
_globals['_TEXTPROCESSOR']._serialized_end=224
|
||||||
|
# @@protoc_insertion_point(module_scope)
|
||||||
97
proto/text_service_pb2_grpc.py
Normal file
97
proto/text_service_pb2_grpc.py
Normal file
|
|
@ -0,0 +1,97 @@
|
||||||
|
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||||
|
"""Client and server classes corresponding to protobuf-defined services."""
|
||||||
|
import grpc
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
import text_service_pb2 as text__service__pb2
|
||||||
|
|
||||||
|
GRPC_GENERATED_VERSION = '1.71.0'
|
||||||
|
GRPC_VERSION = grpc.__version__
|
||||||
|
_version_not_supported = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
from grpc._utilities import first_version_is_lower
|
||||||
|
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
|
||||||
|
except ImportError:
|
||||||
|
_version_not_supported = True
|
||||||
|
|
||||||
|
if _version_not_supported:
|
||||||
|
raise RuntimeError(
|
||||||
|
f'The grpc package installed is at version {GRPC_VERSION},'
|
||||||
|
+ f' but the generated code in text_service_pb2_grpc.py depends on'
|
||||||
|
+ f' grpcio>={GRPC_GENERATED_VERSION}.'
|
||||||
|
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
|
||||||
|
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TextProcessorStub(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def __init__(self, channel):
|
||||||
|
"""Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
channel: A grpc.Channel.
|
||||||
|
"""
|
||||||
|
self.ProcessText = channel.unary_unary(
|
||||||
|
'/text_service.TextProcessor/ProcessText',
|
||||||
|
request_serializer=text__service__pb2.TextRequest.SerializeToString,
|
||||||
|
response_deserializer=text__service__pb2.TextResponse.FromString,
|
||||||
|
_registered_method=True)
|
||||||
|
|
||||||
|
|
||||||
|
class TextProcessorServicer(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def ProcessText(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
|
||||||
|
def add_TextProcessorServicer_to_server(servicer, server):
|
||||||
|
rpc_method_handlers = {
|
||||||
|
'ProcessText': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.ProcessText,
|
||||||
|
request_deserializer=text__service__pb2.TextRequest.FromString,
|
||||||
|
response_serializer=text__service__pb2.TextResponse.SerializeToString,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
generic_handler = grpc.method_handlers_generic_handler(
|
||||||
|
'text_service.TextProcessor', rpc_method_handlers)
|
||||||
|
server.add_generic_rpc_handlers((generic_handler,))
|
||||||
|
server.add_registered_method_handlers('text_service.TextProcessor', rpc_method_handlers)
|
||||||
|
|
||||||
|
|
||||||
|
# This class is part of an EXPERIMENTAL API.
|
||||||
|
class TextProcessor(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def ProcessText(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(
|
||||||
|
request,
|
||||||
|
target,
|
||||||
|
'/text_service.TextProcessor/ProcessText',
|
||||||
|
text__service__pb2.TextRequest.SerializeToString,
|
||||||
|
text__service__pb2.TextResponse.FromString,
|
||||||
|
options,
|
||||||
|
channel_credentials,
|
||||||
|
insecure,
|
||||||
|
call_credentials,
|
||||||
|
compression,
|
||||||
|
wait_for_ready,
|
||||||
|
timeout,
|
||||||
|
metadata,
|
||||||
|
_registered_method=True)
|
||||||
9
requirements.txt
Normal file
9
requirements.txt
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
django==4.2.6
|
||||||
|
djangorestframework==3.14.0
|
||||||
|
grpcio==1.58.0
|
||||||
|
grpcio-tools==1.58.0
|
||||||
|
pdfplumber==0.10.2
|
||||||
|
python-docx==0.8.11
|
||||||
|
Pillow==10.0.1
|
||||||
|
vosk==0.3.45
|
||||||
|
ffmpeg-python==0.2.0
|
||||||
13
тесты.txt
Normal file
13
тесты.txt
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
python manage.py runserver
|
||||||
|
|
||||||
|
------------------------------------------
|
||||||
|
|
||||||
|
Тест расширения для сканирования:
|
||||||
|
|
||||||
|
curl -X POST -F "document=@C:\Users\egord\Desktop\API EasyAccess\api_project\media\uploads\Деев Е.В. Резюме.pdf" http://localhost:8000/api/document-to-text/
|
||||||
|
|
||||||
|
------------------------------------------
|
||||||
|
|
||||||
|
Тест для аудио транскрипции:
|
||||||
|
|
||||||
|
curl -X POST -F audio=@"C:\Users\egord\Desktop\API EasyAccess\api_project\media\uploads\audio.ogg" http://localhost:8000/api/audio-to-text/
|
||||||
Loading…
Add table
Reference in a new issue