Skip to content
Merged

Dev #510

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 0 additions & 44 deletions .github/workflows/benchmarks.yml

This file was deleted.

26 changes: 17 additions & 9 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,23 @@ jobs:
strategy:
matrix:
os: [t4_gpu, ubuntu-latest, windows-latest]
fail-fast: false
env:
# T4 can't run bf16 in vllm; size for the 16GB card. (No-op on the
# CPU runners, which skip the VLM-backed tests.)
VLLM_DTYPE: float16
VLLM_GPU_TYPE: t4
SURYA_INFERENCE_STARTUP_TIMEOUT: "1200"
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.11
uses: actions/setup-python@v4
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
python-version: 3.11
- name: Install python dependencies
run: |
pip install poetry
poetry install
version: latest
enable-cache: true
- name: Set up Python 3.11
run: uv python install 3.11
- name: Install dependencies
run: uv sync --frozen --group dev
- name: Run tests
run: poetry run pytest
run: uv run pytest
2 changes: 1 addition & 1 deletion .github/workflows/cla.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
with:
path-to-signatures: 'signatures/version1/cla.json'
path-to-document: 'https://github.com/VikParuchuri/surya/blob/master/CLA.md'
path-to-document: 'https://github.com/datalab-to/surya/blob/master/CLA.md'
# branch should not be protected
branch: 'master'
allowlist: VikParuchuri
24 changes: 10 additions & 14 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,17 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.11
uses: actions/setup-python@v4
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
python-version: 3.11
- name: Install python dependencies
run: |
pip install poetry
poetry install
version: latest
enable-cache: true
- name: Set up Python 3.11
run: uv python install 3.11
- name: Build package
run: |
poetry build
run: uv build
- name: Publish package
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: |
poetry config pypi-token.pypi "$PYPI_TOKEN"
poetry publish
UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: uv publish
41 changes: 22 additions & 19 deletions .github/workflows/scripts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,36 @@ on: [push]
jobs:
build:
runs-on: t4_gpu
env:
# T4 (Turing, compute 7.5) can't run bf16 in vllm; size vllm for the 16GB
# card and give the cold start (image pull + model download) headroom.
VLLM_DTYPE: float16
VLLM_GPU_TYPE: t4
SURYA_INFERENCE_STARTUP_TIMEOUT: "1200"
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.11
uses: actions/setup-python@v4
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
python-version: 3.11
- name: Install python dependencies
run: |
pip install poetry
poetry install
version: latest
enable-cache: true
- name: Set up Python 3.11
run: uv python install 3.11
- name: Install dependencies
run: uv sync --frozen --group dev
- name: Download benchmark data
run: |
wget -O benchmark_data.zip "https://drive.google.com/uc?export=download&id=1NHrdYatR1rtqs2gPVfdvO0BAvocH8CJi"
unzip -o benchmark_data.zip
- name: Test detection
run: poetry run surya_detect benchmark_data/pdfs/switch_trans.pdf --page_range 0
run: uv run surya_detect benchmark_data/pdfs/switch_trans.pdf --page_range 0
# Spawn the vllm server once and reuse it across the OCR/layout/table
# steps (--keep_server) instead of paying a cold start three times.
- name: Test OCR
env:
RECOGNITION_MAX_TOKENS: 25
run: poetry run surya_ocr benchmark_data/pdfs/switch_trans.pdf --page_range 0
run: uv run surya_ocr benchmark_data/pdfs/switch_trans.pdf --page_range 0 --keep_server
- name: Test layout
run: poetry run surya_layout benchmark_data/pdfs/switch_trans.pdf --page_range 0
run: uv run surya_layout benchmark_data/pdfs/switch_trans.pdf --page_range 0 --keep_server
- name: Test table
run: poetry run surya_table benchmark_data/pdfs/switch_trans.pdf --page_range 0
- name: Test texify
env:
TEXIFY_MAX_TOKENS: 25
run: poetry run surya_latex_ocr benchmark_data/pdfs/switch_trans.pdf --page_range 0
run: uv run surya_table benchmark_data/pdfs/switch_trans.pdf --page_range 0 --keep_server
- name: Test detection folder
run: poetry run surya_detect benchmark_data/pdfs --page_range 0
run: uv run surya_detect benchmark_data/pdfs --page_range 0
6 changes: 3 additions & 3 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ authors:
given-names: Vikas
- name: Datalab Team
date-released: 2025-05-13
url: https://github.com/VikParuchuri/surya
version: 0.14.0
repository-code: https://github.com/VikParuchuri/surya
url: https://github.com/datalab-to/surya
version: 1.0.0
repository-code: https://github.com/datalab-to/surya
Loading
Loading