v. 1.0

2026-06-15 11:01:05 +03:00 · 2025-09-26 01:40:17 +03:00 · 2025-09-26 01:40:17 +03:00 · 6edefc4a8e
commit 6edefc4a8e
parent 0c93340da9
3 changed files with 970 additions and 0 deletions
--- a/bot.py
+++ b/bot.py
@ -0,0 +1,183 @@
 import asyncio
 import logging
 import os
 import shutil
 import zipfile
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from aiogram import Bot, Dispatcher, F, Router
 from aiogram.types import Message, BufferedInputFile
 from aiogram.filters import Command
 from aiogram.enums.parse_mode import ParseMode
 from aiogram.fsm.storage.memory import MemoryStorage
 from aiogram.client.bot import DefaultBotProperties
 # Импорт наших конвертеров
 from md_to_docx import MarkdownToDocxConverter, DocumentSettings
 from rep_to_txt import generate_complete_project_structure
 # Конфигурация
 BOT_TOKEN = "**************************"  # @my_convbot
 # Инициализация бота
 bot = Bot(token=BOT_TOKEN, default=DefaultBotProperties(parse_mode=ParseMode.HTML))
 dp = Dispatcher(storage=MemoryStorage())
 router = Router()
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@router.message(Command("start"))
 async def start_handler(msg: Message) -> None:
    """Приветственное сообщение"""
    await msg.answer(
        "<b>Приветствую в своём Конвертере!</b>\n\n"
        "📋 <b>Возможности:</b>\n"
        "• Отправьте .md файл → получите .docx\n"
        "• Отправьте .zip архив → получите структуру проекта в .txt\n\n"
        "📝 <b>/help</b> - Для подробной информации"
    )
@router.message(Command("help"))
 async def help_handler(msg: Message) -> None:
    """Подробная справка"""
    help_text = (
        "<b>📚 Подробное руководство</b>\n\n"
        "<b>1. Конвертация Markdown → DOCX:</b>\n"
        "• Отправьте .md файл\n"
        "• Получите DOCX с форматированием по ГОСТ\n\n"
        "<b>2. Анализ архива → TXT:</b>\n"
        "• Отправьте .zip архив\n"
        "• Получите полную структуру проекта в текстовом файле\n\n"
        "<b>⚡ Ограничения:</b>\n"
        "• Размер файла: до 20 МБ\n"
        "• Поддерживаемые форматы: .md, .zip"
    )
    await msg.answer(help_text)
@router.message(F.document)
 async def handle_document(msg: Message) -> None:
    """Обработка загруженных документов"""
    document = msg.document
    file_name = document.file_name
    file_size = document.file_size
    if file_size > 20 * 1024 * 1024:
        await msg.answer("❌ Файл слишком большой! Максимум 20 МБ")
        return
    file_ext = Path(file_name).suffix.lower()
    status_msg = await msg.answer("⏳ Обрабатываю файл...")
    try:
        with TemporaryDirectory() as temp_dir:
            # Скачиваем файл
            file_info = await bot.get_file(document.file_id)
            input_path = os.path.join(temp_dir, file_name)
            await bot.download_file(file_info.file_path, input_path)
            if file_ext == '.md':
                # Конвертация MD → DOCX
                output_path = await convert_md_to_docx(input_path, temp_dir)
                output_name = Path(file_name).stem + '.docx'
            elif file_ext in ['.zip']:
                # Анализ архива → TXT
                output_path = await analyze_archive(input_path, temp_dir, file_ext)
                output_name = Path(file_name).stem + '_structure.txt'
            else:
                await status_msg.edit_text("❌ Неподдерживаемый формат файла!")
                return
            # Отправка результата
            with open(output_path, 'rb') as output_file:
                result_file = BufferedInputFile(
                    output_file.read(),
                    filename=output_name
                )
                await msg.answer_document(result_file)
            await status_msg.edit_text("✅ Конвертация завершена!")
    except Exception as e:
        logger.error(f"Ошибка обработки файла: {e}")
        await status_msg.edit_text(f"❌ Ошибка обработки: {str(e)}")
 async def convert_md_to_docx(md_path: str, temp_dir: str) -> str:
    """Конвертация Markdown в DOCX"""
    output_path = os.path.join(temp_dir, "output.docx")
    # Настройки
    settings = DocumentSettings()
    settings.font_name = "Times New Roman"
    settings.font_size = 14
    settings.line_spacing = 1.5
    settings.margin_left = 3.0
    settings.auto_numbering_headings = True
    converter = MarkdownToDocxConverter(settings)
    converter.convert(md_path, output_path)
    return output_path
 async def analyze_archive(archive_path: str, temp_dir: str, file_ext: str) -> str:
    """Анализ архива и создание структуры проекта"""
    extract_dir = os.path.join(temp_dir, "extracted")
    os.makedirs(extract_dir, exist_ok=True)
    # Извлечение архива
    with zipfile.ZipFile(archive_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
    # Поиск основной папки проекта
    extracted_items = os.listdir(extract_dir)
    if len(extracted_items) == 1 and os.path.isdir(os.path.join(extract_dir, extracted_items[0])):
        project_root = os.path.join(extract_dir, extracted_items[0])
    else:
        project_root = extract_dir
    # Генерация структуры
    structure = generate_complete_project_structure(project_root)
    # Сохранение в файл
    output_path = os.path.join(temp_dir, "project_structure.txt")
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(structure)
    return output_path
@router.message()
 async def handle_other_messages(msg: Message) -> None:
    """Обработка остальных сообщений"""
    await msg.answer(
        "<b>Отправьте файл для конвертации:</b>\n"
        "• .md файл для конвертации в DOCX\n"
        "• .zip архив для анализа структуры\n\n"
        "Используйте /help для подробной справки!"
    )
 async def main() -> None:
    """Запуск бота"""
    try:
        logger.info("Запуск File Converter Bot...")
        dp.include_router(router)
        await bot.delete_webhook(drop_pending_updates=True)
        logger.info("Bot started successfully")
        await dp.start_polling(bot)
    except Exception as e:
        logger.critical(f"Критическая ошибка: {e}")
    finally:
        await bot.session.close()
 if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        logger.info("Bot stopped by user")
    except Exception as e:
        logger.critical(f"Unhandled exception: {e}")
--- a/md_to_docx.py
+++ b/md_to_docx.py
@ -0,0 +1,622 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import re
 import sys
 from pathlib import Path
 from docx import Document
 from docx.shared import Inches, Pt, RGBColor, Cm
 from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING
 from docx.enum.style import WD_STYLE_TYPE
 from docx.enum.section import WD_SECTION
 from docx.oxml.shared import OxmlElement, qn
 from docx.oxml.ns import nsdecls
 from docx.oxml import parse_xml
 class DocumentSettings:
    """Настройки форматирования документа с поддержкой ГОСТ"""
    def __init__(self):
        # Базовые настройки текста
        self.font_name = "Times New Roman"
        self.font_size = 14  # основной текст
        self.line_spacing = 1.5
        self.justify_text = True
        self.paragraph_spacing = 6
        self.text_color = (0, 0, 0)
        self.paragraph_indent = 1.25
        # Отступы от полей документа в сантиметрах (ГОСТ 7.32-2017)
        self.margin_top = 2.0
        self.margin_bottom = 2.0
        self.margin_left = 3.0    # увеличено для переплета
        self.margin_right = 1.5
        # Настройки шрифтов заголовков по ГОСТ
        self.heading1_font_size = 16  # Заголовки глав
        self.heading2_font_size = 14  # Заголовки разделов  
        self.heading3_font_size = 14  # Подзаголовки
        self.heading4_font_size = 14
        self.heading5_font_size = 12
        self.heading6_font_size = 12
        self.footnote_font_size = 10
        # Интервалы заголовков по ГОСТ
        self.heading_spacing_before = 12  # пт
        self.heading_spacing_after = 6    # пт
        self.paragraph_spacing_before = 0 # пт
        # Нумерация страниц
        self.page_numbering = True
        self.page_number_position = "bottom_center"  # top_right, bottom_center, bottom_right
        self.page_number_start = 1
        self.exclude_title_page_numbering = True
        # Автонумерация заголовков
        self.auto_numbering_headings = False
        self.numbering_format = "decimal"  # "decimal" (1.1.1) или "simple" (1)
        # Дополнительные ГОСТ настройки
        self.bibliography_style = "gost"
        self.table_caption_position = "above"  # above, below
        self.figure_caption_position = "below"
 class MarkdownToDocxConverter:
    """Конвертер Markdown в DOCX с поддержкой ГОСТ"""
    def __init__(self, settings: DocumentSettings = None):
        self.settings = settings or DocumentSettings()
        self.doc = Document()
        # Счетчики для автонумерации
        self.heading_counters = [0] * 6  # для 6 уровней заголовков
        self.footnote_counter = 0
        self.table_counter = 0
        self.figure_counter = 0
        self.setup_document_margins()
        self.setup_page_numbering()
        self.setup_styles()
    def setup_document_margins(self):
        """Настройка отступов от полей документа по ГОСТ"""
        sections = self.doc.sections
        for section in sections:
            section.top_margin = Cm(self.settings.margin_top)
            section.bottom_margin = Cm(self.settings.margin_bottom)
            section.left_margin = Cm(self.settings.margin_left)
            section.right_margin = Cm(self.settings.margin_right)
    def setup_page_numbering(self):
        """Настройка нумерации страниц согласно ГОСТ"""
        if not self.settings.page_numbering:
            return
        section = self.doc.sections[0]
        # Создание колонтитула для нумерации
        if self.settings.page_number_position == "bottom_center":
            footer = section.footer
            footer_para = footer.paragraphs[0]
            footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
        elif self.settings.page_number_position == "top_right":
            header = section.header
            header_para = header.paragraphs[0]
            header_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
        elif self.settings.page_number_position == "bottom_right":
            footer = section.footer
            footer_para = footer.paragraphs[0]
            footer_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
    def setup_styles(self):
        """Настройка стилей документа в соответствии с ГОСТ"""
        styles = self.doc.styles
        # Настройка базового стиля
        normal_style = styles['Normal']
        normal_font = normal_style.font
        normal_font.name = self.settings.font_name
        normal_font.size = Pt(self.settings.font_size)
        normal_font.color.rgb = RGBColor(*self.settings.text_color)
        normal_paragraph = normal_style.paragraph_format
        normal_paragraph.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
        normal_paragraph.line_spacing = self.settings.line_spacing
        normal_paragraph.space_after = Pt(self.settings.paragraph_spacing)
        normal_paragraph.space_before = Pt(self.settings.paragraph_spacing_before)
        normal_paragraph.first_line_indent = Cm(self.settings.paragraph_indent)
        if self.settings.justify_text:
            normal_paragraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
        # Настройка стилей заголовков с дифференцированными размерами
        heading_sizes = [
            self.settings.heading1_font_size,
            self.settings.heading2_font_size,
            self.settings.heading3_font_size,
            self.settings.heading4_font_size,
            self.settings.heading5_font_size,
            self.settings.heading6_font_size
        ]
        for i in range(1, 7):
            heading_style_name = f'Heading {i}'
            if heading_style_name in [s.name for s in styles]:
                heading_style = styles[heading_style_name]
            else:
                heading_style = styles.add_style(heading_style_name, WD_STYLE_TYPE.PARAGRAPH)
            heading_font = heading_style.font
            heading_font.name = self.settings.font_name
            heading_font.size = Pt(heading_sizes[i-1])  # используем соответствующий размер
            heading_font.bold = True
            heading_font.color.rgb = RGBColor(*self.settings.text_color)
            heading_paragraph = heading_style.paragraph_format
            heading_paragraph.space_before = Pt(self.settings.heading_spacing_before)
            heading_paragraph.space_after = Pt(self.settings.heading_spacing_after)
            # Заголовки 1 и 2 уровня по центру (ГОСТ), остальные с отступом
            if i <= 2:
                heading_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
                heading_paragraph.first_line_indent = Cm(0)
            else:
                if self.settings.justify_text:
                    heading_paragraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
                heading_paragraph.first_line_indent = Cm(self.settings.paragraph_indent)
        # Стиль для сносок
        try:
            footnote_style = styles.add_style('Footnote', WD_STYLE_TYPE.PARAGRAPH)
            footnote_font = footnote_style.font
            footnote_font.name = self.settings.font_name
            footnote_font.size = Pt(self.settings.footnote_font_size)
            footnote_font.color.rgb = RGBColor(*self.settings.text_color)
            footnote_paragraph = footnote_style.paragraph_format
            footnote_paragraph.space_before = Pt(3)
            footnote_paragraph.space_after = Pt(3)
            footnote_paragraph.first_line_indent = Cm(0.5)
        except:
            pass
        # Стиль для кода (без изменений)
        try:
            code_style = styles.add_style('Code', WD_STYLE_TYPE.CHARACTER)
            code_font = code_style.font
            code_font.name = 'Courier New'
            code_font.size = Pt(self.settings.font_size)
            code_font.color.rgb = RGBColor(*self.settings.text_color)
        except:
            pass
        # Стиль для блоков кода
        try:
            code_block_style = styles.add_style('Code Block', WD_STYLE_TYPE.PARAGRAPH)
            code_block_font = code_block_style.font
            code_block_font.name = 'Courier New'
            code_block_font.size = Pt(self.settings.font_size)
            code_block_font.color.rgb = RGBColor(*self.settings.text_color)
            code_block_paragraph = code_block_style.paragraph_format
            code_block_paragraph.left_indent = Inches(0.5)
            code_block_paragraph.first_line_indent = Cm(0)  # без отступа первой строки для кода
            code_block_paragraph.space_before = Pt(6)
            code_block_paragraph.space_after = Pt(6)
        except:
            pass
        # Стиль для подписей к таблицам и рисункам
        try:
            caption_style = styles.add_style('Caption', WD_STYLE_TYPE.PARAGRAPH)
            caption_font = caption_style.font
            caption_font.name = self.settings.font_name
            caption_font.size = Pt(self.settings.font_size - 2)  # меньше основного текста
            caption_font.color.rgb = RGBColor(*self.settings.text_color)
            caption_paragraph = caption_style.paragraph_format
            caption_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
            caption_paragraph.space_before = Pt(6)
            caption_paragraph.space_after = Pt(6)
        except:
            pass
    def generate_heading_number(self, level: int) -> str:
        """Генерация номера заголовка согласно настройкам автонумерации"""
        if not self.settings.auto_numbering_headings:
            return ""
        # Обновляем счетчик текущего уровня
        self.heading_counters[level - 1] += 1
        # Обнуляем счетчики всех нижестоящих уровней
        for i in range(level, 6):
            self.heading_counters[i] = 0
        if self.settings.numbering_format == "simple":
            return f"{self.heading_counters[level - 1]}. "
        else:  # decimal
            # Формируем иерархическую нумерацию
            numbers = []
            for i in range(level):
                if self.heading_counters[i] > 0:
                    numbers.append(str(self.heading_counters[i]))
            return ".".join(numbers) + ". " if numbers else ""
    def parse_markdown_file(self, file_path: str):
        """Чтение и парсинг Markdown файла"""
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
            return content
        except Exception as e:
            raise Exception(f"Ошибка чтения файла: {e}")
    def add_text_run_with_color(self, paragraph, text, bold=False, italic=False, code_style=False):
        """Добавление текста с настройкой цвета"""
        run = paragraph.add_run(text)
        run.font.color.rgb = RGBColor(*self.settings.text_color)
        if bold:
            run.font.bold = True
        if italic:
            run.font.italic = True
        if code_style:
            run.style = 'Code'
        return run
    def process_text_formatting(self, text: str, paragraph):
        """Обработка форматирования текста включая сноски [^1]"""
        # Обработка сносок
        footnote_pattern = r'\[\^(\d+)\]'
        footnotes = re.findall(footnote_pattern, text)
        # Заменяем сноски на верхние индексы
        for footnote_num in footnotes:
            text = re.sub(rf'\[\^{footnote_num}\]', f'{{FOOTNOTE_{footnote_num}}}', text)
        # Разбор текста на части с различным форматированием
        parts = re.split(r'(\*\*.*?\*\*|\*.*?\*|`.*?`|\{FOOTNOTE_\d+\})', text)
        for part in parts:
            if not part:
                continue
            if part.startswith('**') and part.endswith('**'):
                # Жирный текст
                self.add_text_run_with_color(paragraph, part[2:-2], bold=True)
            elif part.startswith('*') and part.endswith('*'):
                # Курсив
                self.add_text_run_with_color(paragraph, part[1:-1], italic=True)
            elif part.startswith('`') and part.endswith('`'):
                # Инлайн код
                self.add_text_run_with_color(paragraph, part[1:-1], code_style=True)
            elif part.startswith('{FOOTNOTE_') and part.endswith('}'):
                # Сноска - добавляем как верхний индекс
                footnote_num = re.search(r'FOOTNOTE_(\d+)', part).group(1)
                run = self.add_text_run_with_color(paragraph, footnote_num)
                run.font.superscript = True
            else:
                # Обычный текст
                self.add_text_run_with_color(paragraph, part)
    def process_list(self, lines: list, start_idx: int):
        """Обработка списков с правильным форматированием по ГОСТ"""
        i = start_idx
        list_items = []
        while i < len(lines):
            line = lines[i].strip()
            if re.match(r'^[-*+]\s', line):
                item_text = re.sub(r'^[-*+]\s', '', line)
                list_items.append(('bullet', item_text, 0))
            elif re.match(r'^\d+\.\s', line):
                item_text = re.sub(r'^\d+\.\s', '', line)
                list_items.append(('number', item_text, 0))
            elif re.match(r'^  [-*+]\s', line):
                item_text = re.sub(r'^  [-*+]\s', '', line)
                list_items.append(('bullet', item_text, 1))
            elif re.match(r'^  \d+\.\s', line):
                item_text = re.sub(r'^  \d+\.\s', '', line)
                list_items.append(('number', item_text, 1))
            elif line == '':
                i += 1
                continue
            else:
                break
            i += 1
        # Добавление элементов списка с настройками ГОСТ
        for list_type, text, level in list_items:
            paragraph = self.doc.add_paragraph()
            paragraph.paragraph_format.left_indent = Cm(level * 0.75)  # увеличенный отступ для вложенности
            paragraph.paragraph_format.first_line_indent = Cm(self.settings.paragraph_indent)
            if list_type == 'bullet':
                paragraph.style = 'List Bullet'
                # Используем тире вместо точек (согласно ГОСТ)
                bullet_run = paragraph.runs[0] if paragraph.runs else paragraph.add_run()
                bullet_run.text = "– "  # длинное тире
            else:
                paragraph.style = 'List Number'
            self.process_text_formatting(text, paragraph)
        return i - 1
    def process_table(self, lines: list, start_idx: int):
        """Обработка таблиц с подписями согласно ГОСТ"""
        i = start_idx
        table_lines = []
        while i < len(lines):
            line = lines[i].strip()
            if '|' in line:
                table_lines.append(line)
            elif line == '':
                i += 1
                continue
            else:
                break
            i += 1
        if len(table_lines) < 2:
            return start_idx
        # Добавляем подпись к таблице (если настроено)
        if self.settings.table_caption_position == "above":
            self.table_counter += 1
            caption_para = self.doc.add_paragraph()
            caption_para.style = 'Caption'
            caption_para.add_run(f"Таблица {self.table_counter}")
        # Парсинг и создание таблицы
        headers = [cell.strip() for cell in table_lines[0].split('|')[1:-1]]
        data_lines = table_lines[2:] if len(table_lines) > 2 else []
        table = self.doc.add_table(rows=1, cols=len(headers))
        table.style = 'Table Grid'
        # Заполнение заголовков
        header_row = table.rows[0]
        for idx, header in enumerate(headers):
            cell = header_row.cells[idx]
            cell.text = header
            for paragraph in cell.paragraphs:
                paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
                for run in paragraph.runs:
                    run.font.bold = True
        # Заполнение данных
        for line in data_lines:
            row_data = [cell.strip() for cell in line.split('|')[1:-1]]
            row = table.add_row()
            for idx, cell_data in enumerate(row_data):
                if idx < len(row.cells):
                    row.cells[idx].text = cell_data
                    # Выравнивание по центру для всех ячеек (ГОСТ)
                    for paragraph in row.cells[idx].paragraphs:
                        paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
        # Подпись снизу (если настроено)
        if self.settings.table_caption_position == "below":
            self.table_counter += 1
            caption_para = self.doc.add_paragraph()
            caption_para.style = 'Caption'
            caption_para.add_run(f"Таблица {self.table_counter}")
        return i - 1
    def process_code_block(self, lines: list, start_idx: int):
        """Обработка блоков кода"""
        i = start_idx + 1
        code_lines = []
        while i < len(lines):
            line = lines[i]
            if line.strip().startswith('```'):
                break
            code_lines.append(line)
            i += 1
        code_paragraph = self.doc.add_paragraph()
        code_paragraph.style = 'Code Block'
        code_paragraph.add_run('\n'.join(code_lines))
        return i
    def add_footnote_definition(self, footnote_num: str, footnote_text: str):
        """Добавление определения сноски в конец документа"""
        footnote_para = self.doc.add_paragraph()
        footnote_para.style = 'Footnote'
        # Номер сноски как верхний индекс
        footnote_run = footnote_para.add_run(footnote_num)
        footnote_run.font.superscript = True
        # Текст сноски
        footnote_para.add_run(f" {footnote_text}")
    def process_bibliography(self, lines: list, start_idx: int):
        """Обработка списка литературы в стиле ГОСТ"""
        i = start_idx
        bib_items = []
        # Поиск элементов библиографии
        while i < len(lines):
            line = lines[i].strip()
            if re.match(r'^\d+\.\s', line):
                bib_text = re.sub(r'^\d+\.\s', '', line)
                bib_items.append(bib_text)
            elif line == '':
                i += 1
                continue
            else:
                break
            i += 1
        if bib_items:
            # Заголовок списка литературы
            bib_heading = self.doc.add_paragraph()
            bib_heading.style = 'Heading 1'
            bib_heading.add_run("СПИСОК ЛИТЕРАТУРЫ")
            # Элементы библиографии
            for idx, item in enumerate(bib_items, 1):
                bib_para = self.doc.add_paragraph()
                bib_para.paragraph_format.first_line_indent = Cm(0)
                bib_para.paragraph_format.left_indent = Cm(1)
                bib_para.add_run(f"{idx}. {item}")
        return i - 1
    def convert(self, md_file_path: str, output_path: str = None):
        """Основной метод конвертации с поддержкой ГОСТ"""
        if not output_path:
            md_path = Path(md_file_path)
            output_path = md_path.with_suffix('.docx')
        content = self.parse_markdown_file(md_file_path)
        lines = content.split('\n')
        # Сбор сносок для обработки в конце
        footnote_definitions = {}
        i = 0
        while i < len(lines):
            line = lines[i]
            stripped_line = line.strip()
            if not stripped_line:
                i += 1
                continue
            # Обработка определений сносок [^1]: текст сноски
            footnote_def_match = re.match(r'^\[\^(\d+)\]:\s*(.+)', stripped_line)
            if footnote_def_match:
                footnote_num = footnote_def_match.group(1)
                footnote_text = footnote_def_match.group(2)
                footnote_definitions[footnote_num] = footnote_text
                i += 1
                continue
            # Заголовки с автонумерацией
            if stripped_line.startswith('#'):
                match = re.match(r'^(#{1,6})\s+(.+)', stripped_line)
                if match:
                    level = len(match.group(1))
                    title = match.group(2)
                    # Разрыв страницы перед заголовком 2 уровня
                    if level == 2:
                        self.doc.add_page_break()
                    heading = self.doc.add_paragraph()
                    heading.style = f'Heading {level}'
                    # Добавляем автонумерацию
                    heading_number = self.generate_heading_number(level)
                    full_title = heading_number + title
                    self.process_text_formatting(full_title, heading)
            # Блоки кода
            elif stripped_line.startswith('```'):
                i = self.process_code_block(lines, i)
            # Таблицы
            elif '|' in stripped_line:
                i = self.process_table(lines, i)
            # Списки
            elif re.match(r'^[-*+]\s', stripped_line) or re.match(r'^\d+\.\s', stripped_line):
                i = self.process_list(lines, i)
            # Список литературы (если заголовок содержит "литература" или "bibliography")
            elif re.match(r'^#+\s*(список\s+литературы|bibliography|references)', stripped_line, re.IGNORECASE):
                i = self.process_bibliography(lines, i + 1)
            # Цитаты
            elif stripped_line.startswith('>'):
                quote_text = re.sub(r'^>\s?', '', stripped_line)
                quote_paragraph = self.doc.add_paragraph()
                quote_paragraph.paragraph_format.left_indent = Inches(0.5)
                quote_paragraph.paragraph_format.right_indent = Inches(0.5)
                self.process_text_formatting(quote_text, quote_paragraph)
                for run in quote_paragraph.runs:
                    run.font.italic = True
            # Горизонтальные линии
            elif stripped_line in ['---', '***', '___']:
                hr_paragraph = self.doc.add_paragraph()
                hr_paragraph.add_run('_' * 50)
                hr_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
            # Обычные абзацы
            else:
                paragraph = self.doc.add_paragraph()
                self.process_text_formatting(stripped_line, paragraph)
            i += 1
        # Добавление сносок в конец документа
        if footnote_definitions:
            # Разделительная линия
            self.doc.add_paragraph().add_run('_' * 50)
            for footnote_num in sorted(footnote_definitions.keys(), key=int):
                self.add_footnote_definition(footnote_num, footnote_definitions[footnote_num])
        self.doc.save(output_path)
        return output_path
 def main():
    """Основная функция для запуска из командной строки"""
    if len(sys.argv) < 2:
        print("Использование: python md_converter.py <путь_к_md_файлу> [путь_к_выходному_файлу]")
        return
    md_file = sys.argv[1]
    output_file = sys.argv[2] if len(sys.argv) > 2 else None
    # ГОСТ-совместимые настройки по умолчанию
    settings = DocumentSettings()
    converter = MarkdownToDocxConverter(settings)
    try:
        output_path = converter.convert(md_file, output_file)
        print(f"Файл успешно конвертирован: {output_path}")
    except Exception as e:
        print(f"Ошибка конвертации: {e}")
 if __name__ == "__main__":
    main()
 # Пример использования с кастомными ГОСТ настройками:
 """
 settings = DocumentSettings()
 settings.font_name = "Times New Roman"
 settings.font_size = 14
 settings.heading1_font_size = 16
 settings.heading2_font_size = 14
 settings.line_spacing = 1.5
 settings.margin_left = 3.0  # для переплета
 settings.auto_numbering_headings = True
 settings.numbering_format = "decimal"  # 1.1.1 формат
 settings.page_numbering = True
 settings.page_number_position = "bottom_center"
 converter = MarkdownToDocxConverter(settings)
 converter.convert("dissertation.md", "dissertation_gost.docx")
 """
--- a/rep_to_txt.py
+++ b/rep_to_txt.py
@ -0,0 +1,165 @@
 import os
 IGNORE_PATTERNS = {
    '.git', '.svn', '.hg',  # Version control systems
    '__pycache__', '.pytest_cache',  # Python artifacts
    'node_modules', '.npm',  # Node.js dependencies
    'target', 'build', 'dist',  # Build outputs
    '.idea', '.vscode',  # IDE metadata
    '.DS_Store', 'Thumbs.db',  # OS metadata
    '.pro.user' # QT user config
 }
 BINARY_EXTENSIONS = {
    '.exe', '.dll', '.so', '.dylib', '.zip', '.tar', '.gz', '.rar', '.7z',
    '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.ico', '.svg', '.webp',
    '.mp3', '.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv',
    '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
    '.bin', '.dat', '.db', '.sqlite', '.mdb'
 }
 def scan_directory(path, prefix=""):
    """Рекурсивное сканирование с форматированием дерева"""
    items = []
    try:
        entries = sorted(os.listdir(path))
        # Critical filtering layer для performance optimization
        dirs = [e for e in entries if os.path.isdir(os.path.join(path, e)) and e not in IGNORE_PATTERNS]
        files = [e for e in entries if os.path.isfile(os.path.join(path, e)) and e not in IGNORE_PATTERNS]
        all_items = dirs + files
        for i, item in enumerate(all_items):
            item_path = os.path.join(path, item)
            is_last_item = (i == len(all_items) - 1)
            if is_last_item:
                current_prefix = prefix + "└── "
                next_prefix = prefix + "    "
            else:
                current_prefix = prefix + "├── "
                next_prefix = prefix + "│   "
            items.append(current_prefix + item)
            if os.path.isdir(item_path):
                items.extend(scan_directory(item_path, next_prefix))
    except PermissionError:
        items.append(prefix + "└── [Access Denied]")
    return items
 def generate_complete_project_structure(root_path):
    """Генератор проектной документации корпоративного уровня"""
    if not os.path.exists(root_path):
        return f"Error: Path {root_path} does not exist"
    result = []
    # Этап 1: Создание древовидной структуры
    root_name = os.path.basename(root_path) or root_path
    result.append(root_name)
    result.extend(scan_directory(root_path))
    # Этап 2: Полное извлечение содержимого файла
    result.append("\n")  # Separator между разделами дерева и содержимым
    result.extend(extract_all_file_contents(root_path))
    return "\n".join(result)
 def extract_all_file_contents(root_path):
    """Механизм извлечения контента с обработкой файлов"""
    content_lines = []
    for root, dirs, files in os.walk(root_path):
        dirs[:] = [d for d in dirs if d not in IGNORE_PATTERNS]
        for file in sorted(files):
            if file in IGNORE_PATTERNS:
                continue
            file_path = os.path.join(root, file)
            relative_path = os.path.relpath(file_path, root_path)
            content_lines.extend(process_single_file(relative_path, file_path))
    return content_lines
 def process_single_file(relative_path, file_path):
    """Обработка файлов"""
    content_lines = []
    # Раздел заголовка
    content_lines.append("\n" + "-" * 80)
    content_lines.append(f"{relative_path}:")
    content_lines.append("-" * 80)
    file_ext = os.path.splitext(relative_path)[1].lower()
    # Обнаружение двоичных файлов и генерация URL-адресов
    if file_ext in BINARY_EXTENSIONS or is_likely_binary(file_path):
        # GitHub raw URL
        if file_ext in {'.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico'}:
            # Структура URL - настраивается на основе фактического хранилища
            github_url = f"https://raw.githubusercontent.com/.../{relative_path.replace(os.sep, '/')}"
            content_lines.append(github_url)
        else:
            content_lines.append("[Binary file - content not displayed]")
    else:
        # Извлечение содержимого текстового файла
        content_lines.extend(extract_text_content(file_path))
    content_lines.append("")
    return content_lines
 def extract_text_content(file_path):
    """Резервное извлечение с несколькими кодировками"""
    encodings_priority = ['utf-8', 'utf-8-sig', 'cp1251', 'latin1', 'cp1252']
    for encoding in encodings_priority:
        try:
            with open(file_path, 'r', encoding=encoding) as f:
                lines = f.readlines()
                return [f"{i:4} | {line.rstrip()}" for i, line in enumerate(lines, 1)]
        except (UnicodeDecodeError, UnicodeError):
            continue
        except Exception as e:
            return [f"ERROR: Не удается прочитать файл - {e}"]
    return ["WARNING: Кодировка файла, не поддерживаемая для извлечения текста"]
 def is_likely_binary(file_path):
    """Эвристическое обнаружение двоичных файлов для крайних случаев"""
    try:
        with open(file_path, 'rb') as f:
            chunk = f.read(8192)
            # Обнаружение нулевого байта - надежный бинарный индикатор
            return b'\x00' in chunk
    except:
        return True
 if __name__ == "__main__":
    # Конфигурация: измените путь к целевому каталогу проекта
    project_path = r"D:\Programs\GitHub\deev.space\static"
    # project_path = r"D:/Programs/GitHub/openoffice"
    # project_path = "."
    print("Приступаем к формированию комплексной структуры проекта...")
    tree_output = generate_complete_project_structure(project_path)
    output_filename = project_path.split('\\')[-1] + "_rep.txt"
    try:
        with open(output_filename, "w", encoding="utf-8") as f:
            f.write(tree_output)
        print(f"\nПолная проектная документация, сохраненная в: {output_filename}")
    except Exception as e:
        print(f"Предупреждение: Не удалось сохранить файл - {e}")
    print("Формирование структуры проекта успешно завершено!")