I wanted to explore the DALL-E 2 diffusion model API along with combinatorial generation algorithms and visualization techniques to create a semi-random collage of latent space manipulations of American Gothic. This experiment involved leveraging both the DALL-E 2 image synthesis capabilities and the GPT-3 language model for style prompt generation, creating a multi-stage generative pipeline. The implementation demonstrates inpainting techniques through masked image editing, where specific spatial regions are reconstructed based on textual conditioning. While developing this computational approach, I utilized ChatGPT for matplotlib visualization assistance, adding a recursive layer of generative model interaction to the creative process. Next, I plan to explore the Whisper automatic speech recognition model from OpenAI.
Import Necessary Libaries and Define a Few Helper Functions
import os
import openai
import itertools
import random
from PIL import Image
import requests
import uuid
import os
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
openai.api_key = os.environ["OPENAI_API_KEY"]
openai.Model.list()
# Functions
def ask_gpt(prompt):
response = openai.Completion.create(
engine="text-davinci-003", # use the latest model available to you
prompt=prompt,
max_tokens=200,
)
output_text_parsed = response.choices[0].text.strip()
print(output_text_parsed)
return response
# make a uniqueish string to label images as they are generated
def generate_uniqueish_string():
return str(uuid.uuid4())[:8]
def process_dalle_images(response, filename, image_dir, i, hash = True):
# save the images
uid = generate_uniqueish_string()
urls = [datum["url"] for datum in response["data"]] # extract URLs
images = [requests.get(url).content for url in urls] # download images
image_names = [f"{filename}_{i + 1}_{uid}.png" for j in range(len(images))] # create names
filepaths = [os.path.join(image_dir, name) for name in image_names] # create filepaths
for image, filepath in zip(images, filepaths): # loop through the variations
with open(filepath, "wb") as image_file: # open the file
image_file.write(image) # write the image to the file
return filepaths
# set a directory to save DALL·E images to
image_dir_name = "images"
image_dir = os.path.join(os.curdir, image_dir_name)
# create the directory if it doesn't yet exist
if not os.path.isdir(image_dir):
os.mkdir(image_dir)
# print the directory to save to
print(f"{image_dir=}")
def top_half_mask(width, height, mask_dir, mask_name):
mask = Image.new("RGBA", (width, height), (0, 0, 0, 1))
for x in range(width):
for y in range(height // 2):
mask.putpixel((x, y), (0, 0, 0, 0))
mask_filepath = os.path.join(mask_dir, mask_name)
mask.save(mask_filepath)
def bottom_half_mask(width, height, mask_dir, mask_name):
mask = Image.new("RGBA", (width, height), (0, 0, 0, 1))
for x in range(width):
for y in range(height // 2, height):
mask.putpixel((x, y), (0, 0, 0, 0))
mask_filepath = os.path.join(mask_dir, mask_name)
mask.save(mask_filepath)
def left_half_mask(width, height, mask_dir, mask_name):
mask = Image.new("RGBA", (width, height), (0, 0, 0, 1))
for x in range(width // 2):
for y in range(height):
mask.putpixel((x, y), (0, 0, 0, 0))
mask_filepath = os.path.join(mask_dir, mask_name)
mask.save(mask_filepath)
def right_half_mask(width, height, mask_dir, mask_name):
mask = Image.new("RGBA", (width, height), (0, 0, 0, 1))
for x in range(width // 2, width):
for y in range(height):
mask.putpixel((x, y), (0, 0, 0, 0))
mask_filepath = os.path.join(mask_dir, mask_name)
mask.save(mask_filepath)
image_dir='./images'
Show Image we are working with
american_gothic = "images/American_Gothic_Square.png"
im = Image.open(american_gothic)
display(im)
Leverage Large Language Models for Style Prompt Generation
# Prompt engineering for art style enumeration via autoregressive language modeling
question = "provide a python list of 15 distinct art styles (i.e. impressionist, cubist, pointlist, photorealistic, japanese wood block print)"
# Join the description and question into a single string
prompt = f"{question}"
gpt_output = ask_gpt(prompt)
# Parse the output
art_styles_string = gpt_output.choices[0]['text']
art_styles = [line.split('. ')[1] for line in art_styles_string.split('\n') if line]
# choose the length of combinations you want, for example 2
length_of_combinations = 2
style_combinations = list(itertools.combinations(art_styles, length_of_combinations))
1. Impressionism
2. Cubism
3. Pointillism
4. Photorealism
5. Japanese Wood Block Print
6. Expressionism
7. Constructivism
8. Abstract Expressionism
9. Surrealism
10. Baroque
11. Realism
12. Neo-Impressionism
13. Art Deco
14. Cubo-Futurism
15. Op Art
# Format the GPT output for Dalle prompt
formatted_strings = []
for combination in style_combinations:
style_1, style_2 = combination
formatted_string = f"in the style of {style_1} and {style_2}"
formatted_strings.append(formatted_string)
# TODO would be better to print at random from the list to actually show
for string in formatted_strings[:10]:
print(string)
in the style of Impressionism and Cubism .
in the style of Impressionism and Pointillism .
in the style of Pointillism and Surrealism .
in the style of Cubism and Baroque .
Create Masks
mask_dir = "images/masks"
# create the directory if it doesn't yet exist
if not os.path.isdir(mask_dir):
os.mkdir(mask_dir)
# print the directory to save to
print(f"{mask_dir=}")
# TODO ask what are the actual image sizes
width = 574
height = 574
mask_dir = "./masks"
os.makedirs(mask_dir, exist_ok=True) # ensure the directory exists
top_half_mask(width, height, mask_dir, "top_half_mask.png")
bottom_half_mask(width, height, mask_dir, "bottom_half_mask.png")
left_half_mask(width, height, mask_dir, "left_half_mask.png")
right_half_mask(width, height, mask_dir, "right_half_mask.png")
# specify edit images dir
edit_image_dir = os.path.join("images", "edits")
mask_dir='images/masks'
os.makedirs(f"{edit_image_dir}", exist_ok=True)
Execute DALL-E 2 Inpainting with Stochastic Mask Selection and Style Conditioning
# TODO: implement metadata embedding for tracking conditioning parameters in generated samples
# Specify the directory
mask_dir = "./masks/"
num_iterations = 4 # specify the number of iterations
# Get the list of all masks in the directory
masks = [f for f in os.listdir(mask_dir) if f.endswith('.png')]
# Iterate for the number of specified iterations
for i in range(num_iterations):
# Select a random mask
selected_mask = random.choice(masks)
# Get the full file path of the selected mask
mask_filepath = os.path.join(mask_dir, selected_mask)
# Select a random style combination
selected_prompt = random.choice(formatted_strings)
# Execute conditional image generation via masked inpainting
edit_response = openai.Image.create_edit(
image=open(american_gothic, "rb"), # source image for conditioning
mask=open(mask_filepath, "rb"), # spatial mask for inpainting region
prompt=selected_prompt, # textual conditioning for style transfer
n=1, # number of samples from posterior distribution
size="512x512", # output resolution
response_format="url",
)
# print response for prototype / debug
# print(edit_response)
edit_filepaths = process_dalle_images(edit_response, "edits", edit_image_dir, i, hash = True)
Visualization of Generated Sample Distribution via Grid Assembly
# Specify the directory
image_directory = "images/edits/"
image_files = [f for f in os.listdir(image_directory) if f.endswith('.png')]
# shuffle image file order to get different images in the plot
image_files = np.random.permutation(image_files)
# Load all the images
images = [Image.open(image_directory + f) for f in image_files]
# Ensure sufficient samples for statistical visualization through repetition
while len(images) < 100:
images *= 2
# Only take the first 100 images
images = images[:100]
# Create a 10x10 visualization grid for sample distribution analysis
fig, axes = plt.subplots(10, 10, figsize=(18, 18))
for i in range(10):
for j in range(10):
# Get the image
img = images[i * 10 + j]
# Remove the axes for each subplot
axes[i, j].axis('off')
# Display the image on the subplot
axes[i, j].imshow(np.array(img), aspect='auto')
# Adjust the space between the subplots
# Negative values for wspace and hspace will make the images overlap
plt.subplots_adjust(wspace=-0.05, hspace=-0.05)
# Show the plot
plt.show()