init commit

62590c76 · duanjinfei · 62590c76 · 62590c76 · 62590c76 · 62590c76
Commit 62590c76 authored Jul 15, 2024 by duanjinfei
62 changed files
--- a/.dockerignore
+++ b/.dockerignore
+# The .dockerignore file excludes files from the container build process.
+#
+# https://docs.docker.com/engine/reference/builder/#dockerignore-file
+# Exclude Git files
+.git
+.github
+.gitignore
+# Exclude Python cache files
+__pycache__
+.mypy_cache
+.pytest_cache
+.ruff_cache
+# Exclude Python virtual environment
+/venv
--- a/.github/workflows/push.yaml
+++ b/.github/workflows/push.yaml
+name: Push to Replicate
+on:
+  # Workflow dispatch allows you to manually trigger the workflow from GitHub.com
+  # Go to your repo, click "Actions", click "Push to Replicate", click "Run workflow"
+  workflow_dispatch:
+    inputs:
+      model_name:
+        description: 'Enter the model name, like "alice/bunny-detector". If unset, this will default to the value of `image` in cog.yaml.'
+  # # Uncomment these lines to trigger the workflow on every push to the main branch
+  # push:
+  #   branches:
+  #     - main
+jobs:
+  push_to_replicate:
+    name: Push to Replicate
+    # If your model is large, the default GitHub Actions runner may not 
+    # have enough disk space. If you need more space you can set up a 
+    # bigger runner on GitHub.
+    runs-on: ubuntu-latest
+    steps:
+      # This action cleans up disk space to make more room for your
+      # model code, weights, etc.
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@v1.3.1
+        with:
+          tool-cache: false
+          docker-images: false
+      - name: Checkout
+        uses: actions/checkout@v4
+      # This action installs Docker buildx and Cog (and optionally CUDA)
+      - name: Setup Cog
+        uses: replicate/setup-cog@v2
+        with:
+          # If you set REPLICATE_API_TOKEN in your GitHub repository secrets,
+          # the action will authenticate with Replicate automatically so you
+          # can push your model
+          token: ${{ secrets.REPLICATE_API_TOKEN }}
+      # If you trigger the workflow manually, you can specify the model name.
+      # If you leave it blank (or if the workflow is triggered by a push), the 
+      # model name will be derived from the `image` value in cog.yaml.
+      - name: Push to Replicate
+        run: |
+          if [ -n "${{ inputs.model_name }}" ]; then
+            cog push r8.im/${{ inputs.model_name }}
+          else
+            cog push
+          fi
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
+{
+  "python.analysis.typeCheckingMode": "basic"
+}
\ No newline at end of file
--- a/cog.yaml
+++ b/cog.yaml
+# Configuration for Cog ⚙️
+# Reference: https://cog.run/yaml
+build:
+  # set to true if your model requires a GPU
+  gpu: true
+  # a list of ubuntu apt packages to install
+  system_packages:
+    - pytorch
+    - defaults
+    - pip=23.0.1
+    - cudatoolkit=11.7
+    - numpy=1.23.3
+    - cython==0.29.33
+  # python version in the form '3.11' or '3.11.4'
+  python_version: "3.10.6"
+  # a list of packages in the format <package-name>==<version>
+  python_packages:
+    - Pillow==9.5.0
+    - gradio==3.50.0
+    - albumentations==0.4.3
+    - opencv-python==4.7.0.72
+    - imageio==2.9.0
+    - imageio-ffmpeg==0.4.2
+    - pytorch-lightning==1.5.0
+    - omegaconf==2.2.3
+    - test-tube==0.7.5
+    - streamlit==1.20.0
+    - einops==0.4.1
+    - transformers==4.30.2
+    - webdataset==0.2.5
+    - kornia==0.6.7
+    - open_clip_torch==2.7.0
+    - torchmetrics==0.11.4
+    - timm==0.6.7
+    - addict==2.4.0
+    - yapf==0.32.0
+    - safetensors==0.4.0
+    - basicsr==1.4.2
+    - jieba==0.42.1
+    - modelscope==1.10.0
+    - tensorflow==2.14.0
+    - torch==2.0.1
+    - torchvision==0.15.2
+    - easydict==1.10
+    - xformers==0.0.20
+    - subword-nmt==0.3.8
+    - sacremoses==0.0.53
+    - sentencepiece==0.1.99
+    - fsspec
+    - diffusers==0.10.2
+    - ujson
+  # commands run after the environment is setup
+  run:
+    - pip install Pillow==9.5.0
+    - pip install gradio==3.50.0
+    - "echo env is ready!"
+    - "echo another command if needed"
+# predict.py defines how predictions are run on your model
+predict: "predict.py:Predictor"
--- a/dataset_util.py
+++ b/dataset_util.py
+import ujson
+import json
+import pathlib
+__all__ = ['load', 'save', 'show_bbox_on_image']
+def load(file_path: str):
+    file_path = pathlib.Path(file_path)
+    func_dict = {'.txt': load_txt, '.json': load_json, '.list': load_txt}
+    assert file_path.suffix in func_dict
+    return func_dict[file_path.suffix](file_path)
+def load_txt(file_path: str):
+    with open(file_path, 'r', encoding='utf8') as f:
+        content = [x.strip().strip('\ufeff').strip('\xef\xbb\xbf')
+                   for x in f.readlines()]
+    return content
+def load_json(file_path: str):
+    with open(file_path, 'rb') as f:
+        content = f.read()
+    return ujson.loads(content)
+def save(data, file_path):
+    file_path = pathlib.Path(file_path)
+    func_dict = {'.txt': save_txt, '.json': save_json}
+    assert file_path.suffix in func_dict
+    return func_dict[file_path.suffix](data, file_path)
+def save_txt(data, file_path):
+    if not isinstance(data, list):
+        data = [data]
+    with open(file_path, mode='w', encoding='utf8') as f:
+        f.write('\n'.join(data))
+def save_json(data, file_path):
+    with open(file_path, 'w', encoding='utf-8') as json_file:
+        json.dump(data, json_file, ensure_ascii=False, indent=4)
+def show_bbox_on_image(image, polygons=None, txt=None, color=None, font_path='./font/Arial_Unicode.ttf'):
+    from PIL import ImageDraw, ImageFont
+    image = image.convert('RGB')
+    draw = ImageDraw.Draw(image)
+    if len(txt) == 0:
+        txt = None
+    if color is None:
+        color = (255, 0, 0)
+    if txt is not None:
+        font = ImageFont.truetype(font_path, 20)
+    for i, box in enumerate(polygons):
+        box = box[0]
+        if txt is not None:
+            draw.text((int(box[0][0]) + 20, int(box[0][1]) - 20),
+                      str(txt[i]), fill='red', font=font)
+        for j in range(len(box) - 1):
+            draw.line((box[j][0], box[j][1], box[j + 1][0],
+                      box[j + 1][1]), fill=color, width=2)
+        draw.line((box[-1][0], box[-1][1], box[0][0],
+                  box[0][1]), fill=color, width=2)
+    return image
+def show_glyphs(glyphs, name):
+    import numpy as np
+    import cv2
+    size = 64
+    gap = 5
+    n_char = 20
+    canvas = np.ones((size, size*n_char + gap*(n_char-1), 1))*0.5
+    x = 0
+    for i in range(glyphs.shape[-1]):
+        canvas[:, x:x + size, :] = glyphs[..., i:i+1]
+        x += size+gap
+    cv2.imwrite(name, canvas*255)
--- a/example_images/banner.png
+++ b/example_images/banner.png
--- a/example_images/edit1.png
+++ b/example_images/edit1.png
--- a/example_images/edit10.png
+++ b/example_images/edit10.png
--- a/example_images/edit11.png
+++ b/example_images/edit11.png
--- a/example_images/edit12.png
+++ b/example_images/edit12.png
--- a/example_images/edit13.png
+++ b/example_images/edit13.png
--- a/example_images/edit14.png
+++ b/example_images/edit14.png
--- a/example_images/edit15.png
+++ b/example_images/edit15.png
--- a/example_images/edit16.png
+++ b/example_images/edit16.png
--- a/example_images/edit2.png
+++ b/example_images/edit2.png
--- a/example_images/edit3.png
+++ b/example_images/edit3.png
--- a/example_images/edit4.png
+++ b/example_images/edit4.png
--- a/example_images/edit5.png
+++ b/example_images/edit5.png
--- a/example_images/edit6.png
+++ b/example_images/edit6.png
--- a/example_images/edit7.png
+++ b/example_images/edit7.png
--- a/example_images/edit8.png
+++ b/example_images/edit8.png
--- a/example_images/edit9.png
+++ b/example_images/edit9.png
--- a/example_images/gen1.png
+++ b/example_images/gen1.png
--- a/example_images/gen10.png
+++ b/example_images/gen10.png
--- a/example_images/gen11.png
+++ b/example_images/gen11.png
--- a/example_images/gen12.png
+++ b/example_images/gen12.png
--- a/example_images/gen13.png
+++ b/example_images/gen13.png
--- a/example_images/gen14.png
+++ b/example_images/gen14.png
--- a/example_images/gen15.png
+++ b/example_images/gen15.png
--- a/example_images/gen16.png
+++ b/example_images/gen16.png
--- a/example_images/gen17.png
+++ b/example_images/gen17.png
--- a/example_images/gen18.png
+++ b/example_images/gen18.png
--- a/example_images/gen19.png
+++ b/example_images/gen19.png
--- a/example_images/gen2.png
+++ b/example_images/gen2.png
--- a/example_images/gen20.png
+++ b/example_images/gen20.png
--- a/example_images/gen21.png
+++ b/example_images/gen21.png
--- a/example_images/gen3.png
+++ b/example_images/gen3.png
--- a/example_images/gen4.png
+++ b/example_images/gen4.png
--- a/example_images/gen5.png
+++ b/example_images/gen5.png
--- a/example_images/gen6.png
+++ b/example_images/gen6.png
--- a/example_images/gen7.png
+++ b/example_images/gen7.png
--- a/example_images/gen8.png
+++ b/example_images/gen8.png
--- a/example_images/gen9.png
+++ b/example_images/gen9.png
--- a/example_images/ref1.jpg
+++ b/example_images/ref1.jpg
--- a/example_images/ref10.jpg
+++ b/example_images/ref10.jpg
--- a/example_images/ref11.jpg
+++ b/example_images/ref11.jpg
--- a/example_images/ref12.png
+++ b/example_images/ref12.png
--- a/example_images/ref13.jpg
+++ b/example_images/ref13.jpg
--- a/example_images/ref14.png
+++ b/example_images/ref14.png
--- a/example_images/ref15.jpeg
+++ b/example_images/ref15.jpeg
--- a/example_images/ref16.jpeg
+++ b/example_images/ref16.jpeg
--- a/example_images/ref2.jpg
+++ b/example_images/ref2.jpg
--- a/example_images/ref3.jpg
+++ b/example_images/ref3.jpg
--- a/example_images/ref4.jpg
+++ b/example_images/ref4.jpg
--- a/example_images/ref5.jpg
+++ b/example_images/ref5.jpg
--- a/example_images/ref6.jpg
+++ b/example_images/ref6.jpg
--- a/example_images/ref7.jpg
+++ b/example_images/ref7.jpg
--- a/example_images/ref8.jpg
+++ b/example_images/ref8.jpg
--- a/example_images/ref9.jpg
+++ b/example_images/ref9.jpg
--- a/font/Arial-Unicode.ttf
+++ b/font/Arial-Unicode.ttf
--- a/predict.py
+++ b/predict.py
+# Prediction interface for Cog ⚙️
+# https://cog.run/python
+from cog import BasePredictor, Input, Path
+from typing import List
+from modelscope.pipelines import pipeline
+# from util import save_images
+img_save_folder = "SaveImages"
+params = {
+    "show_debug": True,
+    "image_count": 2,
+    "ddim_steps": 20,
+}
+class Predictor(BasePredictor):
+    def setup(self) -> None:
+        self.model = pipeline('my-anytext-task',
+                              model='damo/cv_anytext_text_generation_editing', model_revision='v1.1.3')
+    def predict(
+        self,
+        mode: str = Input(description="Select model type", default="text-generation", choices=[
+            "text-generation", "text-editing"]),
+        prompt: str = Input(description="Input prompt",
+                            default='photo of caramel macchiato coffee on the table, top-down perspective, with "Any" "Text" written on it using cream'),
+        seed: int = Input(description="", default=66273235,
+                          ge=66273235, le=0),
+        draw_pos: Path = Input(
+            description="", default='example_images/gen9.png'),
+        ori_image: Path = Input(
+            description="", default='example_images/ref7.jpg'),
+        use_fp32: bool = Input(description="", default=False),
+        no_translator: bool = Input(description="", default=False),
+        font_path: str = Input(
+            description="", default='font/Arial_Unicode.ttf'),
+        model_path: str = Input(description="", default=None)
+    ) -> List[Path]:
+        input_data = {
+            "prompt": prompt,
+            "seed": seed,
+            "draw_pos": draw_pos,
+            "ori_image": ori_image,
+            "use_fp32": use_fp32,
+            "no_translator": no_translator,
+            "font_path": font_path,
+            "model_path": model_path
+        }
+        results, rtn_code, rtn_warning, debug_info = self.model(
+            input_data, mode=mode, **params)
+        if rtn_warning:
+            print(rtn_warning)
+            return []
+        files = []
+        if rtn_code >= 0:
+            # save_images(results, img_save_folder)
+            print(f'Done, result images are saved in: {img_save_folder}')
+            for file in results:
+                files.append(file)
+        return files
--- a/util.py
+++ b/util.py
+import datetime
+import os
+import cv2
+def save_images(img_list, folder):
+    if not os.path.exists(folder):
+        os.makedirs(folder)
+    now = datetime.datetime.now()
+    date_str = now.strftime("%Y-%m-%d")
+    folder_path = os.path.join(folder, date_str)
+    if not os.path.exists(folder_path):
+        os.makedirs(folder_path)
+    time_str = now.strftime("%H_%M_%S")
+    for idx, img in enumerate(img_list):
+        image_number = idx + 1
+        filename = f"{time_str}_{image_number}.jpg"
+        save_path = os.path.join(folder_path, filename)
+        cv2.imwrite(save_path, img[..., ::-1])
+def check_channels(image):
+    channels = image.shape[2] if len(image.shape) == 3 else 1
+    if channels == 1:
+        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+    elif channels > 3:
+        image = image[:, :, :3]
+    return image
+def resize_image(img, max_length=768):
+    height, width = img.shape[:2]
+    max_dimension = max(height, width)
+    if max_dimension > max_length:
+        scale_factor = max_length / max_dimension
+        new_width = int(round(width * scale_factor))
+        new_height = int(round(height * scale_factor))
+        new_size = (new_width, new_height)
+        img = cv2.resize(img, new_size)
+    height, width = img.shape[:2]
+    img = cv2.resize(img, (width-(width % 64), height-(height % 64)))
+    return img