Initial release

2026-01-12 02:36:10 +00:00 · 2025-06-21 18:15:33 +02:00 · 2025-06-21 18:15:33 +02:00 · ae5e4b8873
commit ae5e4b8873
52 changed files with 17572 additions and 0 deletions
--- a/backend/.env
+++ b/backend/.env
@ -0,0 +1,8 @@
+POSTGRES_USER=scientify_user
+POSTGRES_PASSWORD=scientify_pass
+POSTGRES_DB=scientify_db
+POSTGRES_HOST=db
+POSTGRES_PORT=5432
+
+# Database URL for local development with Docker
+DATABASE_URL=postgresql+asyncpg://scientify_user:scientify_pass@db:5432/scientify_db
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -0,0 +1,25 @@
+FROM python:3.11-slim
+
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    libpq-dev \
+    libffi-dev \
+    libxml2-dev \
+    libxslt1-dev \
+    libcairo2-dev \
+    libpango1.0-dev \
+    libgdk-pixbuf2.0-dev \
+    libgtk-3-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+EXPOSE 8000
+
+CMD ["python", "main.py"]
--- a/backend/app/app.py
+++ b/backend/app/app.py
@ -0,0 +1,82 @@
+from contextlib import asynccontextmanager
+
+from fastapi import Depends, FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from app.db import User, create_db_and_tables
+from app.schemas import UserCreate, UserRead, UserUpdate
+from app.users import auth_backend, current_active_user, fastapi_users
+from app.upload import router as upload_router
+from app.download import router as download_router
+from app.publication_routes import router as publication_router
+from app.debug_routes import router as debug_router
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    await create_db_and_tables()
+    yield
+
+
+app = FastAPI(
+    title="Scientify API",
+    description="API for managing scientific publications",
+    version="1.0.0",
+    lifespan=lifespan
+)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["http://frontend:80"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Include routers for different parts of the application
+app.include_router(upload_router)
+app.include_router(download_router)
+app.include_router(publication_router)
+app.include_router(debug_router)
+
+# Authentication and user management routes
+app.include_router(
+    fastapi_users.get_auth_router(auth_backend), prefix="/auth/jwt", tags=["auth"]
+)
+app.include_router(
+    fastapi_users.get_register_router(UserRead, UserCreate),
+    prefix="/auth",
+    tags=["auth"],
+)
+app.include_router(
+    fastapi_users.get_reset_password_router(),
+    prefix="/auth",
+    tags=["auth"],
+)
+app.include_router(
+    fastapi_users.get_verify_router(UserRead),
+    prefix="/auth",
+    tags=["auth"],
+)
+app.include_router(
+    fastapi_users.get_users_router(UserRead, UserUpdate),
+    prefix="/users",
+    tags=["users"],
+)
+
+
+@app.get("/authenticated-route")
+async def authenticated_route(user: User = Depends(current_active_user)):
+    return {"message": f"Hello {user.email}!"}
+
+
+@app.get("/")
+async def root():
+    """
+    Root endpoint for the Scientify API
+    """
+    return {
+        "message": "Welcome to Scientify API",
+        "description": "The intelligent platform to manage your scientific publications",
+        "documentation": "/docs"
+    }
--- a/backend/app/db.py
+++ b/backend/app/db.py
@ -0,0 +1,100 @@
+import os
+from collections.abc import AsyncGenerator
+
+from fastapi import Depends
+from fastapi_users.db import SQLAlchemyBaseUserTableUUID, SQLAlchemyUserDatabase
+
+from sqlalchemy import Column, Integer, String, Table, ForeignKey, LargeBinary, DateTime
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
+from sqlalchemy.orm import DeclarativeBase, relationship, sessionmaker
+from sqlalchemy.dialects.postgresql import UUID
+import uuid
+import datetime
+
+DATABASE_URL = os.getenv("DATABASE_URL", "postgresql+asyncpg://scientify_user:scientify_pass@db:5432/scientify_db")
+
+if not "+asyncpg" in DATABASE_URL:
+    raise ValueError("DATABASE_URL must use asyncpg driver for async operations. Use postgresql+asyncpg://...")
+
+class Base(DeclarativeBase):
+    pass
+
+
+class User(SQLAlchemyBaseUserTableUUID, Base):
+    first_name = Column(String, nullable=True)
+    last_name = Column(String, nullable=True)
+
+    publications = relationship("Publication", back_populates="user")
+
+
+try:
+    engine = create_async_engine(DATABASE_URL, echo=True)
+    print("Database engine created successfully")
+except Exception as e:
+    print(f"Error creating database engine: {e}")
+    raise
+
+async_session_maker = sessionmaker(
+    engine, class_=AsyncSession, expire_on_commit=False
+)
+
+
+async def get_db():
+    async with async_session_maker() as session:
+        yield session
+
+
+async def create_db_and_tables():
+    async with engine.begin() as conn:
+        await conn.run_sync(Base.metadata.create_all)
+
+
+async def get_async_session():
+    async with async_session_maker() as session:
+        yield session
+
+
+async def get_user_db(session: AsyncSession = Depends(get_async_session)):
+    yield SQLAlchemyUserDatabase(session, User)
+
+
+publication_authors = Table(
+    'publication_authors', Base.metadata,
+    Column('publication_id', Integer, ForeignKey('publications.id', ondelete='CASCADE')),
+    Column('author_id', Integer, ForeignKey('authors.id', ondelete='CASCADE'))
+)
+
+publication_keywords = Table(
+    'publication_keywords', Base.metadata,
+    Column('publication_id', Integer, ForeignKey('publications.id', ondelete='CASCADE')),
+    Column('keyword_id', Integer, ForeignKey('keywords.id', ondelete='CASCADE'))
+)
+
+
+class Author(Base):
+    __tablename__ = 'authors'
+    id = Column(Integer, primary_key=True)
+    name = Column(String, nullable=False)
+
+
+class Keyword(Base):
+    __tablename__ = 'keywords'
+    id = Column(Integer, primary_key=True)
+    name = Column(String, nullable=False)
+
+
+class Publication(Base):
+    __tablename__ = 'publications'
+    id = Column(Integer, primary_key=True)
+    title = Column(String, nullable=False)
+    file = Column(LargeBinary, nullable=False)
+    filename = Column(String)
+    upload_date = Column(DateTime, default=datetime.datetime.utcnow)
+    journal = Column(String, nullable=True)
+    year = Column(Integer, nullable=True)
+    doi = Column(String, nullable=True, unique=True)
+
+    user_id = Column(UUID(as_uuid=True), ForeignKey('user.id'), nullable=False)
+    user = relationship("User", back_populates="publications")
+    authors = relationship('Author', secondary=publication_authors, backref='publications')
+    keywords = relationship('Keyword', secondary=publication_keywords, backref='publications')
--- a/backend/app/debug_routes.py
+++ b/backend/app/debug_routes.py
@ -0,0 +1,59 @@
+from fastapi import Depends, APIRouter
+from sqlalchemy import select
+from sqlalchemy.orm import selectinload
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import Publication, get_db, Keyword, Author
+
+router = APIRouter(prefix="/debug", tags=["debug"])
+
+# Debug endpoint to view all publications with complete data
+@router.get("/publications")
+async def debug_publications(db: AsyncSession = Depends(get_db)):
+    """Debug endpoint to view all publications with their complete data"""
+    stmt = select(Publication).options(
+        selectinload(Publication.authors),
+        selectinload(Publication.keywords),
+        selectinload(Publication.user)
+    ).order_by(Publication.upload_date.desc())
+
+    result = await db.execute(stmt)
+    publications = result.scalars().all()
+
+    debug_data = []
+    for pub in publications:
+        debug_data.append({
+            "id": pub.id,
+            "title": pub.title,
+            "authors": [{"id": a.id, "name": a.name} for a in pub.authors],
+            "keywords": [{"id": k.id, "name": k.name} for k in pub.keywords],  # 🎯 KEYWORDS!
+            "upload_date": pub.upload_date,
+            "journal": pub.journal,
+            "year": pub.year,
+            "doi": pub.doi,
+            "user_email": pub.user.email if pub.user else None,
+            "user_id": str(pub.user_id) if pub.user_id else None
+        })
+
+    return {
+        "total_publications": len(publications),
+        "publications": debug_data
+    }
+
+
+# Debug endpoint to view all authors
+@router.get("/authors")
+async def debug_authors(db: AsyncSession = Depends(get_db)):
+    """Debug endpoint to view all authors"""
+    result = await db.execute(select(Author))
+    authors = result.scalars().all()
+    return [{"id": a.id, "name": a.name} for a in authors]
+
+
+# Debug endpoint to view all keywords
+@router.get("/keywords")
+async def debug_keywords(db: AsyncSession = Depends(get_db)):
+    """🎯 Debug endpoint to view all keywords - THE HEART OF THE SYSTEM!"""
+    result = await db.execute(select(Keyword))
+    keywords = result.scalars().all()
+    return [{"id": k.id, "name": k.name} for k in keywords]
--- a/backend/app/download.py
+++ b/backend/app/download.py
@ -0,0 +1,26 @@
+from fastapi import APIRouter, HTTPException, Depends
+from fastapi.responses import StreamingResponse
+
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+
+from app.db import Publication, get_db
+
+import io
+
+router = APIRouter()
+
+@router.get("/download/{publication_id}")
+async def download_publication(publication_id: int, db: AsyncSession = Depends(get_db)):
+    result = await db.execute(select(Publication).where(Publication.id == publication_id))
+    publication = result.scalar_one_or_none()
+    if not publication:
+        raise HTTPException(status_code=404, detail="Publication not found")
+    file_bytes = publication.file
+    filename = publication.filename or "document.pdf"
+
+    return StreamingResponse(
+        io.BytesIO(file_bytes),
+        media_type="application/pdf",
+        headers={"Content-Disposition": f"attachment; filename={filename}"}
+    )
--- a/backend/app/file_converter.py
+++ b/backend/app/file_converter.py
@ -0,0 +1,362 @@
+import os
+import tempfile
+from pathlib import Path
+from typing import Tuple, Optional
+from io import BytesIO
+import logging
+import re
+
+from docx import Document as DocxDocument
+from reportlab.pdfgen import canvas
+from reportlab.lib.pagesizes import letter, A4
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
+from reportlab.lib.units import inch
+from weasyprint import HTML, CSS
+import mammoth
+
+logger = logging.getLogger(__name__)
+
+
+class FileConverter:
+    @staticmethod
+    def get_file_extension(filename: str) -> str:
+        return Path(filename).suffix.lower()
+
+    @staticmethod
+    def convert_docx_to_pdf_reportlab(docx_content: bytes, original_filename: str) -> Tuple[bytes, str]:
+        try:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                # Saves docx
+                docx_path = os.path.join(temp_dir, "temp.docx")
+                with open(docx_path, "wb") as f:
+                    f.write(docx_content)
+
+                # Reads docx
+                doc = DocxDocument(docx_path)
+
+                # Creates pdf
+                pdf_path = os.path.join(temp_dir, "output.pdf")
+                FileConverter._create_pdf_from_docx(doc, pdf_path)
+
+                # Reads pdf
+                with open(pdf_path, "rb") as f:
+                    pdf_content = f.read()
+
+                # Creates filename
+                new_filename = original_filename.replace('.docx', '.pdf')
+
+                return pdf_content, new_filename
+
+        except Exception as e:
+            return FileConverter.convert_docx_to_pdf_mammoth(docx_content, original_filename)
+
+    @staticmethod
+    def convert_docx_to_pdf_mammoth(docx_content: bytes, original_filename: str) -> Tuple[bytes, str]:
+        try:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                # Saves docx
+                docx_path = os.path.join(temp_dir, "temp.docx")
+                with open(docx_path, "wb") as f:
+                    f.write(docx_content)
+
+                # Converts in HTML
+                with open(docx_path, "rb") as docx_file:
+                    result = mammoth.convert_to_html(docx_file)
+                    html_content = result.value
+
+                # Creates HTML
+                full_html = FileConverter._wrap_html_with_styles(html_content, "DOCX Document")
+
+                # Converts to PDF
+                pdf_bytes = FileConverter._html_to_pdf(full_html)
+
+                new_filename = original_filename.replace('.docx', '.pdf')
+
+                return pdf_bytes, new_filename
+
+        except Exception as e:
+            raise Exception(f"Impossible to convert from DOCX to PDF: {str(e)}")
+
+    @staticmethod
+    def _create_pdf_from_docx(docx_doc, output_path: str):
+        doc = SimpleDocTemplate(output_path, pagesize=A4)
+        styles = getSampleStyleSheet()
+        story = []
+
+        # Custom styles
+        title_style = ParagraphStyle(
+            'CustomTitle',
+            parent=styles['Heading1'],
+            fontSize=16,
+            spaceAfter=12,
+            textColor='black'
+        )
+
+        normal_style = ParagraphStyle(
+            'CustomNormal',
+            parent=styles['Normal'],
+            fontSize=11,
+            spaceAfter=6,
+            textColor='black'
+        )
+
+        for paragraph in docx_doc.paragraphs:
+            if paragraph.text.strip():
+                if len(paragraph.text) < 100 and paragraph.text.isupper():
+                    style = title_style
+                elif paragraph.runs and paragraph.runs[0].bold:
+                    style = title_style
+                else:
+                    style = normal_style
+
+                p = Paragraph(paragraph.text, style)
+                story.append(p)
+                story.append(Spacer(1, 6))
+
+        if not story:
+            story.append(Paragraph("DOCX converted", normal_style))
+
+        # Costruisci il PDF
+        doc.build(story)
+
+    @staticmethod
+    def convert_latex_to_pdf(latex_content: bytes, original_filename: str) -> Tuple[bytes, str]:
+        try:
+            # Decodes LaTeX
+            latex_text = latex_content.decode('utf-8', errors='ignore')
+
+            # Converts to HTML
+            html_content = FileConverter._latex_to_html_advanced(latex_text)
+
+            # Converts to PDF
+            pdf_bytes = FileConverter._html_to_pdf(html_content)
+
+            # Creates filename
+            new_filename = original_filename.replace('.tex', '.pdf').replace('.latex', '.pdf')
+
+            return pdf_bytes, new_filename
+
+        except Exception as e:
+            raise Exception(f"Impossibile to convert from LaTeX to PDF: {str(e)}")
+
+    @staticmethod
+    def _latex_to_html_advanced(latex_text: str) -> str:
+        html = latex_text
+
+        html = re.sub(r'\\documentclass(?:\[[^\]]*\])?\{[^}]*\}', '', html)
+        html = re.sub(r'\\usepackage(?:\[[^\]]*\])?\{[^}]*\}', '', html)
+        html = re.sub(r'\\begin\{document\}', '', html)
+        html = re.sub(r'\\end\{document\}', '', html)
+        html = re.sub(r'\\maketitle', '', html)
+
+        html = re.sub(r'\\title\{([^}]*)\}', r'<h1 class="title">\1</h1>', html)
+        html = re.sub(r'\\author\{([^}]*)\}', r'<p class="author"><strong>Autore:</strong> \1</p>', html)
+        html = re.sub(r'\\date\{([^}]*)\}', r'<p class="date"><strong>Data:</strong> \1</p>', html)
+
+ 
+        html = re.sub(r'\\section\*?\{([^}]*)\}', r'<h2>\1</h2>', html)
+        html = re.sub(r'\\subsection\*?\{([^}]*)\}', r'<h3>\1</h3>', html)
+        html = re.sub(r'\\subsubsection\*?\{([^}]*)\}', r'<h4>\1</h4>', html)
+        html = re.sub(r'\\paragraph\{([^}]*)\}', r'<h5>\1</h5>', html)
+
+
+        html = re.sub(r'\\textbf\{([^}]*)\}', r'<strong>\1</strong>', html)
+        html = re.sub(r'\\textit\{([^}]*)\}', r'<em>\1</em>', html)
+        html = re.sub(r'\\emph\{([^}]*)\}', r'<em>\1</em>', html)
+        html = re.sub(r'\\underline\{([^}]*)\}', r'<u>\1</u>', html)
+        html = re.sub(r'\\texttt\{([^}]*)\}', r'<code>\1</code>', html)
+
+        
+        html = re.sub(r'\$\$([^$]+)\$\$', r'<div class="math-block">\1</div>', html)
+        html = re.sub(r'\$([^$]+)\$', r'<span class="math-inline">\1</span>', html)
+
+        
+        html = re.sub(r'\\begin\{itemize\}', '<ul>', html)
+        html = re.sub(r'\\end\{itemize\}', '</ul>', html)
+        html = re.sub(r'\\begin\{enumerate\}', '<ol>', html)
+        html = re.sub(r'\\end\{enumerate\}', '</ol>', html)
+        html = re.sub(r'\\item(?:\[[^\]]*\])?\s*', '<li>', html)
+
+        
+        html = re.sub(r'\\begin\{quote\}', '<blockquote>', html)
+        html = re.sub(r'\\end\{quote\}', '</blockquote>', html)
+
+        
+        html = re.sub(r'\\begin\{figure\}.*?\\end\{figure\}', '<div class="figure">[Figura]</div>', html,
+                      flags=re.DOTALL)
+
+        
+        html = re.sub(r'\\begin\{table\}.*?\\end\{table\}', '<div class="table">[Tabella]</div>', html, flags=re.DOTALL)
+
+        
+        html = re.sub(r'\\[a-zA-Z]+(?:\[[^\]]*\])?\{[^}]*\}', '', html)
+        html = re.sub(r'\\[a-zA-Z]+', '', html)
+
+        
+        html = re.sub(r'\\\\', '<br>', html)
+
+        
+        html = re.sub(r'\n\s*\n', '</p><p>', html)
+
+        
+        html = re.sub(r'\s+', ' ', html)
+        html = html.strip()
+
+        return FileConverter._wrap_html_with_styles(html, "LaTeX Document")
+
+    @staticmethod
+    def _wrap_html_with_styles(content: str, title: str) -> str:
+        html_template = f"""
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <meta charset="utf-8">
+            <title>{title}</title>
+            <style>
+                @page {{
+                    size: A4;
+                    margin: 2cm;
+                }}
+                body {{
+                    font-family: 'Times New Roman', serif;
+                    font-size: 12pt;
+                    line-height: 1.6;
+                    text-align: justify;
+                    color: #000;
+                }}
+                .title {{
+                    font-size: 20pt;
+                    font-weight: bold;
+                    text-align: center;
+                    margin-bottom: 16pt;
+                }}
+                .author, .date {{
+                    text-align: center;
+                    margin-bottom: 12pt;
+                    font-style: italic;
+                }}
+                h1, h2 {{ 
+                    font-size: 16pt; 
+                    font-weight: bold;
+                    margin-top: 20pt; 
+                    margin-bottom: 12pt;
+                }}
+                h3 {{ 
+                    font-size: 14pt; 
+                    font-weight: bold;
+                    margin-top: 16pt; 
+                    margin-bottom: 10pt;
+                }}
+                h4, h5 {{ 
+                    font-size: 12pt; 
+                    font-weight: bold;
+                    margin-top: 12pt; 
+                    margin-bottom: 8pt;
+                }}
+                p {{ 
+                    margin-bottom: 12pt; 
+                    text-indent: 0;
+                }}
+                ul, ol {{ 
+                    margin-bottom: 12pt; 
+                    padding-left: 30pt;
+                }}
+                li {{ 
+                    margin-bottom: 6pt; 
+                }}
+                blockquote {{
+                    margin: 12pt 20pt;
+                    padding: 8pt;
+                    border-left: 3pt solid #ccc;
+                    font-style: italic;
+                }}
+                code {{
+                    font-family: 'Courier New', monospace;
+                    background-color: #f5f5f5;
+                    padding: 2pt;
+                }}
+                .math-block {{
+                    text-align: center;
+                    margin: 12pt 0;
+                    font-family: 'Times New Roman', serif;
+                }}
+                .math-inline {{
+                    font-family: 'Times New Roman', serif;
+                }}
+                .figure, .table {{
+                    text-align: center;
+                    margin: 20pt 0;
+                    padding: 10pt;
+                    border: 1pt solid #ccc;
+                    background-color: #f9f9f9;
+                }}
+                strong {{ font-weight: bold; }}
+                em {{ font-style: italic; }}
+                u {{ text-decoration: underline; }}
+            </style>
+        </head>
+        <body>
+            <div>{content}</div>
+        </body>
+        </html>
+        """
+
+        return html_template
+
+    @staticmethod
+    def _html_to_pdf(html_content: str) -> bytes:
+        try:
+            # Creates PDF
+            html_doc = HTML(string=html_content)
+            pdf_bytes = html_doc.write_pdf()
+
+            return pdf_bytes
+
+        except Exception as e:
+            raise Exception(f"Impossible to convert from HTML to PDF: {str(e)}")
+
+    @staticmethod
+    def convert_to_pdf_if_needed(file_content: bytes, filename: str) -> Tuple[bytes, str]:
+        extension = FileConverter.get_file_extension(filename)
+
+        if extension == '.pdf':
+            return file_content, filename
+        elif extension == '.docx':
+            return FileConverter.convert_docx_to_pdf_mammoth(file_content, filename)
+        elif extension in ['.tex', '.latex']:
+            return FileConverter.convert_latex_to_pdf(file_content, filename)
+        else:
+            raise Exception(f"Format not supported: {extension}")
+
+
+class AdvancedDocxConverter:
+    @staticmethod
+    def convert_docx_with_pandoc(docx_content: bytes, original_filename: str) -> Tuple[bytes, str]:
+        try:
+            import pypandoc
+
+            with tempfile.TemporaryDirectory() as temp_dir:
+                # Saves DOCX
+                docx_path = os.path.join(temp_dir, "temp.docx")
+                with open(docx_path, "wb") as f:
+                    f.write(docx_content)
+
+                # Converts to HTML
+                html_content = pypandoc.convert_file(docx_path, 'html')
+
+                full_html = FileConverter._wrap_html_with_styles(html_content, "DOCX Document")
+
+                # Converts HTML to PDF
+                pdf_bytes = FileConverter._html_to_pdf(full_html)
+
+                new_filename = original_filename.replace('.docx', '.pdf')
+
+                return pdf_bytes, new_filename
+
+        except ImportError:
+            logger.warning("pypandoc not found for DOCX")
+            return FileConverter.convert_docx_to_pdf_mammoth(docx_content, original_filename)
+        except Exception as e:
+            logger.warning(f"pandoc error in DOCX: {e}, fallback to standard converter")
+            return FileConverter.convert_docx_to_pdf_mammoth(docx_content, original_filename)
--- a/backend/app/publication_routes.py
+++ b/backend/app/publication_routes.py
@ -0,0 +1,233 @@
+from fastapi import Depends, APIRouter, Query, HTTPException
+from sqlalchemy import select, or_, and_, asc, desc
+from sqlalchemy.orm import selectinload
+from sqlalchemy.ext.asyncio import AsyncSession
+from typing import List, Optional
+
+from app.db import Publication, get_db, Keyword, Author, User
+from app.schemas import PublicationOut, UserPublicationOut
+from app.users import current_active_user
+
+# Create router for publication endpoints
+router = APIRouter()
+
+# Endpoint to delete a publication
+@router.delete("/publications/{publication_id}")
+async def delete_publication(
+        publication_id: int,
+        user: User = Depends(current_active_user),
+        db: AsyncSession = Depends(get_db)
+):
+    """
+    Delete a publication owned by the current user
+    """
+    # Find the publication with relations
+    result = await db.execute(
+        select(Publication).options(
+            selectinload(Publication.authors),
+            selectinload(Publication.keywords)
+        ).where(
+            and_(
+                Publication.id == publication_id,
+                Publication.user_id == user.id  # Security: only user's own publications
+            )
+        )
+    )
+    publication = result.scalar_one_or_none()
+
+    if not publication:
+        raise HTTPException(
+            status_code=404,
+            detail="Publication not found or you don't have permission to delete it"
+        )
+
+    publication_title = publication.title
+
+    # Delete the publication (many-to-many relations are deleted automatically)
+    await db.delete(publication)
+    await db.commit()
+
+    print(f"🗑️ Publication deleted: '{publication_title}' (ID: {publication_id}) by user {user.email}")
+
+    return {"message": f"Publication '{publication_title}' successfully deleted"}
+
+
+# Endpoint for user publications with sorting
+@router.get("/users/me/publications", response_model=List[UserPublicationOut])
+async def get_user_publications(
+        order_by: Optional[str] = Query("date_desc",
+                                       description="Sort by: date_asc, date_desc, title_asc, title_desc"),
+        user: User = Depends(current_active_user),
+        db: AsyncSession = Depends(get_db)
+):
+    """
+    Returns all publications uploaded by the current user with sorting
+    """
+    stmt = select(Publication).options(
+        selectinload(Publication.authors),
+        selectinload(Publication.keywords)
+    ).where(
+        Publication.user_id == user.id
+    )
+
+    # Sorting management
+    if order_by == "date_asc":
+        stmt = stmt.order_by(asc(Publication.upload_date))
+    elif order_by == "date_desc":
+        stmt = stmt.order_by(desc(Publication.upload_date))
+    elif order_by == "title_asc":
+        stmt = stmt.order_by(asc(Publication.title))
+    elif order_by == "title_desc":
+        stmt = stmt.order_by(desc(Publication.title))
+    else:
+        # Default: descending by date (most recent first)
+        stmt = stmt.order_by(desc(Publication.upload_date))
+
+    result = await db.execute(stmt)
+    publications = result.scalars().all()
+
+    print(f"🔍 User {user.email} (ID: {user.id}) has {len(publications)} publications (sorted by: {order_by})")
+
+    return publications
+
+
+# Search publications endpoint
+@router.get("/publications", response_model=List[PublicationOut])
+async def get_publications(
+        search: Optional[str] = Query(None,
+                                      description="Search by title, author or keyword. For multiple keywords use spaces: 'keyword1 keyword2'"),
+        order_by: Optional[str] = Query("date_desc",
+                                       description="Sort by: date_asc, date_desc, title_asc, title_desc"),
+        db: AsyncSession = Depends(get_db)
+):
+    """
+    🔍 ADVANCED SEARCH SYSTEM WITH KEYWORDS
+
+    Search function with priority and sorting:
+    1. Keywords (highest priority) - supports multiple search with spaces
+    2. Authors (medium priority)
+    3. Title (lowest priority)
+
+    Keywords are the core of the search system!
+    """
+
+    print(f"🔍 Search: '{search}' | Sort by: {order_by}")
+
+    # If no search query, return all sorted
+    if search is None or not search.strip():
+        stmt = select(Publication).options(
+            selectinload(Publication.authors),
+            selectinload(Publication.keywords)
+        )
+
+        # Sorting management
+        if order_by == "date_asc":
+            stmt = stmt.order_by(asc(Publication.upload_date))
+        elif order_by == "date_desc":
+            stmt = stmt.order_by(desc(Publication.upload_date))
+        elif order_by == "title_asc":
+            stmt = stmt.order_by(asc(Publication.title))
+        elif order_by == "title_desc":
+            stmt = stmt.order_by(desc(Publication.title))
+        else:
+            # Default: descending by date
+            stmt = stmt.order_by(desc(Publication.upload_date))
+
+        result = await db.execute(stmt)
+        return result.scalars().all()
+
+    search_term = search.strip()
+
+    # Split search string into individual keywords
+    search_keywords = [kw.strip().lower() for kw in search_term.split() if kw.strip()]
+    print(f"🔍 Keywords to search: {search_keywords}")
+
+    # SET to track already found IDs
+    found_publication_ids = set()
+    final_results = []
+
+    # 🎯 1. SEARCH BY KEYWORDS (highest priority) - MULTIPLE SEARCH
+    if search_keywords:
+        print("🔍 Step 1: Searching by multiple keywords...")
+
+        # Create conditions for each keyword
+        keyword_conditions = []
+        for keyword in search_keywords:
+            keyword_pattern = f"%{keyword}%"
+            keyword_conditions.append(
+                Publication.keywords.any(Keyword.name.ilike(keyword_pattern))
+            )
+
+        # Publication must have ALL keywords (AND)
+        keyword_query = select(Publication).options(
+            selectinload(Publication.authors),
+            selectinload(Publication.keywords)
+        ).where(
+            and_(*keyword_conditions)  # All conditions must be true
+        )
+
+        keyword_result = await db.execute(keyword_query)
+        keyword_publications = keyword_result.scalars().all()
+
+        for pub in keyword_publications:
+            if pub.id not in found_publication_ids:
+                final_results.append(pub)
+                found_publication_ids.add(pub.id)
+                pub_keywords = [k.name for k in pub.keywords]
+                print(f"   ✅ Found by keywords: {pub.title} (keywords: {pub_keywords})")
+
+    # 📝 2. SEARCH BY AUTHORS (medium priority) - uses complete string
+    print("🔍 Step 2: Searching by authors...")
+    author_pattern = f"%{search_term}%"
+    author_query = select(Publication).options(
+        selectinload(Publication.authors),
+        selectinload(Publication.keywords)
+    ).join(Publication.authors).where(
+        Author.name.ilike(author_pattern)
+    )
+
+    author_result = await db.execute(author_query)
+    author_publications = author_result.scalars().all()
+
+    for pub in author_publications:
+        if pub.id not in found_publication_ids:
+            final_results.append(pub)
+            found_publication_ids.add(pub.id)
+            pub_authors = [a.name for a in pub.authors]
+            print(f"   ✅ Found by author: {pub.title} (authors: {pub_authors})")
+
+    # 📰 3. SEARCH BY TITLE (lowest priority) - uses complete string
+    print("🔍 Step 3: Searching by title...")
+    title_pattern = f"%{search_term}%"
+    title_query = select(Publication).options(
+        selectinload(Publication.authors),
+        selectinload(Publication.keywords)
+    ).where(
+        Publication.title.ilike(title_pattern)
+    )
+
+    title_result = await db.execute(title_query)
+    title_publications = title_result.scalars().all()
+
+    for pub in title_publications:
+        if pub.id not in found_publication_ids:
+            final_results.append(pub)
+            found_publication_ids.add(pub.id)
+            print(f"   ✅ Found by title: {pub.title}")
+
+    # Apply sorting to final results
+    print(f"🔍 Applying sorting: {order_by}")
+    if order_by == "date_asc":
+        final_results.sort(key=lambda x: x.upload_date)
+    elif order_by == "date_desc":
+        final_results.sort(key=lambda x: x.upload_date, reverse=True)
+    elif order_by == "title_asc":
+        final_results.sort(key=lambda x: x.title.lower())
+    elif order_by == "title_desc":
+        final_results.sort(key=lambda x: x.title.lower(), reverse=True)
+    else:
+        # Default: descending by date
+        final_results.sort(key=lambda x: x.upload_date, reverse=True)
+
+    print(f"🔍 Total results found: {len(final_results)}")
+    return final_results
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@ -0,0 +1,69 @@
+import uuid
+
+from pydantic import BaseModel
+from typing import List, Optional
+from datetime import datetime
+
+from fastapi_users import schemas
+
+
+class UserRead(schemas.BaseUser[uuid.UUID]):
+    first_name: Optional[str] = None
+    last_name: Optional[str] = None
+
+
+class UserCreate(schemas.BaseUserCreate):
+    first_name: Optional[str] = None
+    last_name: Optional[str] = None
+
+
+class UserUpdate(schemas.BaseUserUpdate):
+    first_name: Optional[str] = None
+    last_name: Optional[str] = None
+
+
+class AuthorOut(BaseModel):
+    id: int
+    name: str
+
+    class Config:
+        orm_mode = True
+
+
+class KeywordOut(BaseModel):
+    id: int
+    name: str
+
+    class Config:
+        orm_mode = True
+
+
+class PublicationOut(BaseModel):
+    id: int
+    title: str
+    filename: Optional[str]
+    upload_date: datetime
+    journal: Optional[str] = None
+    year: Optional[int] = None
+    doi: Optional[str] = None
+    authors: List[AuthorOut]
+    keywords: List[KeywordOut]
+    user_id: Optional[uuid.UUID] = None
+
+    class Config:
+        orm_mode = True
+
+
+class UserPublicationOut(BaseModel):
+    id: int
+    title: str
+    filename: Optional[str]
+    upload_date: datetime
+    journal: Optional[str] = None
+    year: Optional[int] = None
+    doi: Optional[str] = None
+    authors: List[AuthorOut]
+    keywords: List[KeywordOut]
+
+    class Config:
+        orm_mode = True
--- a/backend/app/upload.py
+++ b/backend/app/upload.py
@ -0,0 +1,253 @@
+from fastapi import APIRouter, UploadFile, File, Form, Depends, HTTPException
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+from sqlalchemy.orm import joinedload
+
+from app.db import Publication, Author, Keyword, User, get_db
+from app.utils import parser, nlp
+from app.users import current_active_user
+from app.file_converter import FileConverter, AdvancedDocxConverter
+
+from typing import Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+
+@router.post("/upload/")
+async def upload_publication(
+        file: UploadFile = File(...),
+        bibtex: Optional[UploadFile] = File(None),
+        title: Optional[str] = Form(None),
+        authors: Optional[str] = Form(None),
+        year: Optional[int] = Form(None),
+        journal: Optional[str] = Form(None),
+        doi: Optional[str] = Form(None),
+        db: AsyncSession = Depends(get_db),
+        user: User = Depends(current_active_user)
+):
+    try:
+        bibtex_metadata = None
+
+        if bibtex is not None:
+            try:
+                bibtex_content = (await bibtex.read()).decode("utf-8")
+                b_title, b_authors, b_year, b_journal, b_doi = parser.bibtex(bibtex_content)
+                bibtex_metadata = {
+                    "title": b_title,
+                    "authors": b_authors,
+                    "year": b_year,
+                    "journal": b_journal,
+                    "doi": b_doi
+                }
+
+                title = title or b_title
+                authors = authors or b_authors
+                year = year or b_year
+                journal = journal or b_journal
+                doi = doi or b_doi
+
+                logger.info(f"BibTeX processed. Metadatas extracted: {bibtex_metadata}")
+
+            except Exception as e:
+                logger.error(f"Parsing BibTeX error: {e}")
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Parsing BibTeX error: {str(e)}"
+                )
+
+        if doi and not is_valid_doi(doi):
+            raise HTTPException(
+                status_code=400,
+                detail="DOI invalid. Use this format: 10.xxxx/xxxxx"
+            )
+
+        if doi:
+            existing_doi = await db.execute(
+                select(Publication).where(Publication.doi == doi)
+            )
+            if existing_doi.scalar_one_or_none():
+                raise HTTPException(
+                    status_code=400,
+                    detail="DOI existing"
+                )
+        if bibtex is None:
+            missing_fields = []
+            if not title: missing_fields.append("title")
+            if not authors: missing_fields.append("authors")
+            if not year: missing_fields.append("year")
+            if not journal: missing_fields.append("journal")
+
+            if missing_fields:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Missing fields: {', '.join(missing_fields)}. "
+                           f"Insert fields or upload a BibTeX."
+                )
+            logger.info("Manual mode")
+        else:
+            if not all([title, authors, year, journal]):
+                missing_from_bibtex = []
+                if not title: missing_from_bibtex.append("title")
+                if not authors: missing_from_bibtex.append("authors")
+                if not year: missing_from_bibtex.append("year")
+                if not journal: missing_from_bibtex.append("journal")
+
+                logger.error(f"Missing from BibTeX: {missing_from_bibtex}")
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Missing fields: {', '.join(missing_from_bibtex)}. "
+                )
+            logger.info("BibTeX mode")
+
+        if not file:
+            raise HTTPException(status_code=400, detail="File needed")
+
+        allowed_extensions = ['.pdf', '.docx', '.tex', '.latex']
+        file_extension = '.' + file.filename.split('.')[-1].lower() if '.' in file.filename else ''
+        if file_extension not in allowed_extensions:
+            logger.error(f"Extension not allowed: {file_extension}")
+            raise HTTPException(
+                status_code=400,
+                detail=f"Extension not allowed, please upload these: {', '.join(allowed_extensions)}"
+            )
+
+        content = await file.read()
+        logger.info(f"File uploaded: {file.filename} ({len(content)} bytes)")
+
+        try:
+            file_ext = FileConverter.get_file_extension(file.filename)
+            conversion_method = "none"
+
+            if file_ext == '.docx':
+                try:
+                    converted_content, final_filename = AdvancedDocxConverter.convert_docx_with_pandoc(
+                        content, file.filename
+                    )
+                    conversion_method = "pandoc"
+                    logger.info(f"DOCX converted pandoc: {file.filename} -> {final_filename}")
+                except Exception as pandoc_error:
+                    logger.warning(f"Pandoc failed with DOCX: {pandoc_error}, use mammoth")
+                    converted_content, final_filename = FileConverter.convert_to_pdf_if_needed(
+                        content, file.filename
+                    )
+                    conversion_method = "mammoth"
+                    logger.info(f"DOCX converted with mammoth: {file.filename} -> {final_filename}")
+            else:
+                converted_content, final_filename = FileConverter.convert_to_pdf_if_needed(
+                    content, file.filename
+                )
+                conversion_method = "standard" if file_ext in ['.tex', '.latex'] else "none"
+                logger.info(f"File processed: {file.filename} -> {final_filename}")
+
+        except Exception as e:
+            logger.error(f"Error while converting the file: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail=f"Error while converting the file: {str(e)}"
+            )
+
+        try:
+            text = parser.extract_text(file.filename, content)
+            keywords = nlp.extract_keywords(text)
+            logger.info(f"{len(keywords)} keywords extracted")
+        except Exception as e:
+            logger.warning(f"Error while extracting keywords: {e}")
+            keywords = []
+
+        author_names = [a.strip() for a in authors.split(",") if a.strip()]
+        keyword_names = [k.strip().lower() for k in keywords if k.strip()]
+
+        logger.info(f"Authors to process: {author_names}")
+        logger.info(f"Keywords to process: {keyword_names}")
+
+        author_objs = []
+        for name in author_names:
+            result = await db.execute(select(Author).where(Author.name == name))
+            author = result.scalar_one_or_none()
+            if not author:
+                author = Author(name=name)
+                db.add(author)
+                await db.flush()
+                logger.info(f"New author created: {name}")
+            else:
+                logger.info(f"Existing author found: {name}")
+            author_objs.append(author)
+
+        keyword_objs = []
+        for kw in keyword_names:
+            result = await db.execute(select(Keyword).where(Keyword.name == kw))
+            keyword = result.scalar_one_or_none()
+            if not keyword:
+                keyword = Keyword(name=kw)
+                db.add(keyword)
+                await db.flush()
+                logger.info(f"Keyword created: {kw}")
+            else:
+                logger.info(f"Existing keyword found: {kw}")
+            keyword_objs.append(keyword)
+
+        publication = Publication(
+            title=title,
+            file=converted_content,
+            filename=final_filename,
+            journal=journal,
+            year=year,
+            doi=doi,
+            user_id=user.id,
+            authors=author_objs,
+            keywords=keyword_objs
+        )
+        db.add(publication)
+        await db.commit()
+        await db.refresh(publication)
+
+        result = await db.execute(
+            select(Publication)
+            .options(joinedload(Publication.authors), joinedload(Publication.keywords))
+            .where(Publication.id == publication.id)
+        )
+        publication_with_rel = result.unique().scalar_one()
+
+        author_names_response = [author.name for author in publication_with_rel.authors]
+        keyword_names_response = [kw.name for kw in publication_with_rel.keywords]
+
+        response_data = {
+            "id": publication_with_rel.id,
+            "title": publication_with_rel.title,
+            "authors": author_names_response,
+            "keywords": keyword_names_response,
+            "journal": publication_with_rel.journal,
+            "year": publication_with_rel.year,
+            "doi": publication_with_rel.doi,
+            "original_filename": file.filename,
+            "converted_filename": final_filename,
+            "conversion_method": conversion_method
+        }
+
+        if bibtex is not None:
+            response_data["metadata_source"] = "bibtex"
+            response_data["bibtex_data"] = bibtex_metadata
+            logger.info("Saved with BibTeX metadata")
+        else:
+            response_data["metadata_source"] = "manual"
+            logger.info("Saved with classical metadata")
+
+        return response_data
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        await db.rollback()
+        raise HTTPException(
+            status_code=500,
+            detail=f"Upload error: {str(e)}"
+        )
+
+
+def is_valid_doi(doi: str) -> bool:
+    import re
+    doi_pattern = r'^10\.\d{4,}/[-._;()/:\w\[\]]+$'
+    return bool(re.match(doi_pattern, doi, re.IGNORECASE))
--- a/backend/app/users.py
+++ b/backend/app/users.py
@ -0,0 +1,56 @@
+import uuid
+from typing import Optional
+
+from fastapi import Depends, Request
+from fastapi_users import BaseUserManager, FastAPIUsers, UUIDIDMixin, models
+from fastapi_users.authentication import (
+    AuthenticationBackend,
+    BearerTransport,
+    JWTStrategy,
+)
+from fastapi_users.db import SQLAlchemyUserDatabase
+
+from app.db import User, get_user_db
+
+#CHANGE ME
+SECRET = "1d90d4315c0a0313fb65211fa82e88129cddedb8b662553fbd38f44be9dc818bbd8623ca0177d965e762ee9727b5f6a2bd98481311ecccbcae846bff4f57b8ce72a51fca3278caa05ff18e54c563788d2a67b44be6fc667c12d1b6c2d869f6637b67025a6aa938e811616f27c160a13dc7b653e56a9823f61a165cdf671f734c"
+
+
+class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
+    reset_password_token_secret = SECRET
+    verification_token_secret = SECRET
+
+    async def on_after_register(self, user: User, request: Optional[Request] = None):
+        print(f"User {user.id} has registered.")
+
+    async def on_after_forgot_password(
+        self, user: User, token: str, request: Optional[Request] = None
+    ):
+        print(f"User {user.id} has forgot their password. Reset token: {token}")
+
+    async def on_after_request_verify(
+        self, user: User, token: str, request: Optional[Request] = None
+    ):
+        print(f"Verification requested for user {user.id}. Verification token: {token}")
+
+
+async def get_user_manager(user_db: SQLAlchemyUserDatabase = Depends(get_user_db)):
+    yield UserManager(user_db)
+
+
+bearer_transport = BearerTransport(tokenUrl="auth/jwt/login")
+
+
+def get_jwt_strategy() -> JWTStrategy[models.UP, models.ID]:
+    return JWTStrategy(secret=SECRET, lifetime_seconds=3600)
+
+
+auth_backend = AuthenticationBackend(
+    name="jwt",
+    transport=bearer_transport,
+    get_strategy=get_jwt_strategy,
+)
+
+fastapi_users = FastAPIUsers[User, uuid.UUID](get_user_manager, [auth_backend])
+
+current_active_user = fastapi_users.current_user(active=True)
--- a/backend/app/utils/nlp.py
+++ b/backend/app/utils/nlp.py
@ -0,0 +1,6 @@
+import yake
+
+def extract_keywords(text: str, num_keywords: int = 5) -> list:
+    kw_extractor = yake.KeywordExtractor(lan="en", n=1, top=num_keywords)
+    keywords = kw_extractor.extract_keywords(text)
+    return [kw for kw, _ in keywords]
--- a/backend/app/utils/parser.py
+++ b/backend/app/utils/parser.py
@ -0,0 +1,165 @@
+import bibtexparser
+import io
+import logging
+from typing import Tuple, Optional
+from pdfminer.high_level import extract_text as pdf_extract_text
+from pdfminer.high_level import extract_text_to_fp
+import tempfile
+import os
+
+logger = logging.getLogger(__name__)
+
+
+def bibtex(bibtex_content: str) -> Tuple[Optional[str], Optional[str], Optional[int], Optional[str], Optional[str]]:
+    """
+    Estrae title, authors, year, journal, doi dal primo record di un file bibtex.
+    Ritorna una tupla (title, authors, year, journal, doi).
+    """
+    bib_database = bibtexparser.load(io.StringIO(bibtex_content))
+    if not bib_database.entries:
+        return (None, None, None, None, None)
+
+    entry = bib_database.entries[0]
+    title = entry.get('title')
+    authors = entry.get('authors')  # o 'author' se il campo è diverso
+    year = int(entry['year']) if 'year' in entry else None
+    journal = entry.get('journal')
+    doi = entry.get('doi')
+
+    return (title, authors, year, journal, doi)
+
+
+def extract_text(filename: str, content: bytes) -> str:
+    """
+    🎯 FUNZIONE FONDAMENTALE: Estrae testo da file PDF per l'analisi delle keywords
+
+    Args:
+        filename: Nome del file (per determinare il tipo)
+        content: Contenuto del file in bytes
+
+    Returns:
+        str: Testo estratto dal documento
+    """
+    try:
+        # Determina l'estensione del file
+        file_extension = os.path.splitext(filename.lower())[1]
+
+        if file_extension == '.pdf':
+            return extract_text_from_pdf(content)
+        elif file_extension == '.docx':
+            return extract_text_from_docx(content)
+        elif file_extension in ['.tex', '.latex']:
+            return extract_text_from_latex(content)
+        else:
+            logger.warning(f"Tipo di file non supportato per estrazione testo: {file_extension}")
+            return ""
+
+    except Exception as e:
+        logger.error(f"Errore nell'estrazione del testo da {filename}: {e}")
+        return ""
+
+
+def extract_text_from_pdf(pdf_content: bytes) -> str:
+    """
+    Estrae testo da contenuto PDF usando pdfminer
+    """
+    try:
+        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as temp_file:
+            temp_file.write(pdf_content)
+            temp_file.flush()
+
+            # Estrae il testo usando pdfminer
+            text = pdf_extract_text(temp_file.name)
+
+            # Pulisce il file temporaneo
+            os.unlink(temp_file.name)
+
+            logger.info(f"Estratto testo PDF: {len(text)} caratteri")
+            return text or ""
+
+    except Exception as e:
+        logger.error(f"Errore nell'estrazione testo da PDF: {e}")
+        return ""
+
+
+def extract_text_from_docx(docx_content: bytes) -> str:
+    """
+    Estrae testo da contenuto DOCX usando python-docx
+    """
+    try:
+        from docx import Document
+
+        with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as temp_file:
+            temp_file.write(docx_content)
+            temp_file.flush()
+
+            # Estrae il testo usando python-docx
+            doc = Document(temp_file.name)
+            text_parts = []
+
+            for paragraph in doc.paragraphs:
+                text_parts.append(paragraph.text)
+
+            text = '\n'.join(text_parts)
+
+            # Pulisce il file temporaneo
+            os.unlink(temp_file.name)
+
+            logger.info(f"Estratto testo DOCX: {len(text)} caratteri")
+            return text
+
+    except Exception as e:
+        logger.error(f"Errore nell'estrazione testo da DOCX: {e}")
+        return ""
+
+
+def extract_text_from_latex(latex_content: bytes) -> str:
+    """
+    Estrae testo da contenuto LaTeX rimuovendo i comandi LaTeX
+    """
+    try:
+        from pylatexenc.latex2text import LatexNodes2Text
+
+        # Decodifica il contenuto
+        latex_text = latex_content.decode('utf-8', errors='ignore')
+
+        # Converte LaTeX in testo semplice
+        converter = LatexNodes2Text()
+        text = converter.latex_to_text(latex_text)
+
+        logger.info(f"Estratto testo LaTeX: {len(text)} caratteri")
+        return text
+
+    except Exception as e:
+        logger.error(f"Errore nell'estrazione testo da LaTeX: {e}")
+        # Fallback: rimuovi manualmente i comandi LaTeX più comuni
+        try:
+            latex_text = latex_content.decode('utf-8', errors='ignore')
+            # Rimuove comandi LaTeX di base
+            import re
+            text = re.sub(r'\\[a-zA-Z]+\{[^}]*\}', '', latex_text)
+            text = re.sub(r'\\[a-zA-Z]+', '', text)
+            text = re.sub(r'\{[^}]*\}', '', text)
+            text = re.sub(r'%.*', '', text)  # Rimuove commenti
+            return text.strip()
+        except:
+            return ""
+
+
+def clean_extracted_text(text: str) -> str:
+    """
+    Pulisce il testo estratto per migliorare l'estrazione delle keywords
+    """
+    import re
+
+    # Rimuove caratteri di controllo e spazi multipli
+    text = re.sub(r'\s+', ' ', text)
+
+    # Rimuove caratteri speciali eccessivi
+    text = re.sub(r'[^\w\s\-.,;:()[\]{}]', ' ', text)
+
+    # Rimuove linee molto corte (probabilmente header/footer)
+    lines = text.split('\n')
+    clean_lines = [line.strip() for line in lines if len(line.strip()) > 10]
+
+    return '\n'.join(clean_lines).strip()
--- a/backend/docker-compose.yml
+++ b/backend/docker-compose.yml
@ -0,0 +1,14 @@
+services:
+  db:
+    container_name: pg
+    image: postgres:15-alpine
+    env_file:
+      - ./.env
+    ports:
+      - "5432:5432"
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+
+volumes:
+  postgres_data:
+
--- a/backend/main.py
+++ b/backend/main.py
@ -0,0 +1,9 @@
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
+#from database import save_publication
+import uvicorn
+
+if __name__ == "__main__":
+    uvicorn.run("app.app:app", host="0.0.0.0", log_level="info")
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -0,0 +1,34 @@
+#backend core
+fastapi[all]
+fastapi-asyncpg
+fastapi-users[sqlalchemy,postgresql]
+
+#database drivers
+asyncpg
+
+#parser
+python-multipart
+
+#web server
+uvicorn
+
+#utils
+pdfminer.six
+python-docx
+pylatexenc
+bibtexparser
+
+#NLP
+yake
+
+#file conversion
+python-docx
+reportlab
+weasyprint
+markdown
+mammoth
+pypandoc-binary
+
+boto3
+
+python-dotenv