attention maps and tokens being sent to web UI

damian · damian0815 · commit a054fae5d59b · 2022-12-09T14:03:24.000+01:00
diff --git a/backend/invoke_ai_web_server.py b/backend/invoke_ai_web_server.py
@@ -18,8 +18,9 @@
 from uuid import uuid4
 from threading import Event
 
+from ldm.generate import Generate
 from ldm.invoke.args import Args, APP_ID, APP_VERSION, calculate_init_img_hash
-from ldm.invoke.conditioning import get_tokens_for_prompt
+from ldm.invoke.conditioning import get_tokens_for_prompt, get_prompt_structure
 from ldm.invoke.pngwriter import PngWriter, retrieve_metadata
 from ldm.invoke.prompt_parser import split_weighted_subprompts
 from ldm.invoke.generator.inpaint import infill_methods
@@ -40,7 +41,7 @@
 
 
 class InvokeAIWebServer:
-    def __init__(self, generate, gfpgan, codeformer, esrgan) -> None:
+    def __init__(self, generate: Generate, gfpgan, codeformer, esrgan) -> None:
         self.host = args.host
         self.port = args.port
 
@@ -1092,8 +1093,10 @@ def image_done(image, seed, first_seed, attention_maps_image=None):
                 self.socketio.emit("progressUpdate", progress.to_formatted_dict())
                 eventlet.sleep(0)
 
-                attention_maps_image_base64_url, tokens = (None, None) if attention_maps_image is None \
-                    else image_to_dataURL(attention_maps_image), get_tokens_for_prompt(generation_parameters["prompt"])
+                parsed_prompt, _ = get_prompt_structure(generation_parameters["prompt"])
+                tokens = get_tokens_for_prompt(self.generate.model, parsed_prompt)
+                attention_maps_image_base64_url = None if attention_maps_image is None \
+                    else image_to_dataURL(attention_maps_image)
 
                 self.socketio.emit(
                     "generationResult",
@@ -1108,7 +1111,7 @@ def image_done(image, seed, first_seed, attention_maps_image=None):
                         "boundingBox": original_bounding_box,
                         "generationMode": generation_parameters["generation_mode"],
                         "attentionMaps": attention_maps_image_base64_url,
-                        "tokens": tokens
+                        "tokens": tokens,
                     },
                 )
                 eventlet.sleep(0)
diff --git a/ldm/generate.py b/ldm/generate.py
@@ -485,7 +485,7 @@ def process_image(image,seed):
                 'extractor':self.safety_feature_extractor
             } if self.safety_checker else None
 
-            results, attention_maps_images = generator.generate(
+            results = generator.generate(
                 prompt,
                 iterations=iterations,
                 seed=self.seed,
diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py
@@ -8,6 +8,7 @@
 import traceback
 import yaml
 
+from ldm.generate import Generate
 from ldm.invoke.globals import Globals
 from ldm.invoke.prompt_parser import PromptParser
 from ldm.invoke.readline import get_completer, Completer
@@ -27,7 +28,7 @@ def main():
     """Initialize command-line parsers and the diffusion model"""
     global infile
     print('* Initializing, be patient...')
-    
+
     opt  = Args()
     args = opt.parse_args()
     if not args:
@@ -47,7 +48,7 @@ def main():
     # alert - setting globals here
     Globals.root = os.path.expanduser(args.root_dir or os.environ.get('INVOKEAI_ROOT') or os.path.abspath('.'))
     Globals.try_patchmatch = args.patchmatch
-    
+
     print(f'>> InvokeAI runtime directory is "{Globals.root}"')
 
     # loading here to avoid long delays on startup
@@ -281,7 +282,7 @@ def main_loop(gen, opt):
             prefix = file_writer.unique_prefix()
             step_callback = make_step_callback(gen, opt, prefix) if opt.save_intermediates > 0 else None
 
-            def image_writer(image, seed, upscaled=False, first_seed=None, use_prefix=None, prompt_in=None):
+            def image_writer(image, seed, upscaled=False, first_seed=None, use_prefix=None, prompt_in=None, attention_maps_image=None):
                 # note the seed is the seed of the current image
                 # the first_seed is the original seed that noise is added to
                 # when the -v switch is used to generate variations
@@ -341,8 +342,8 @@ def image_writer(image, seed, upscaled=False, first_seed=None, use_prefix=None,
                             filename,
                             tool,
                             formatted_dream_prompt,
-                        )                           
-                        
+                        )
+
                     if (not postprocessed) or opt.save_original:
                         # only append to results if we didn't overwrite an earlier output
                         results.append([path, formatted_dream_prompt])
@@ -432,7 +433,7 @@ def do_command(command:str, gen, opt:Args, completer) -> tuple:
         add_embedding_terms(gen, completer)
         completer.add_history(command)
         operation = None
-        
+
     elif command.startswith('!models'):
         gen.model_cache.print_models()
         completer.add_history(command)
@@ -533,7 +534,7 @@ def add_weights_to_config(model_path:str, gen, opt, completer):
 
     completer.complete_extensions(('.yaml','.yml'))
     completer.linebuffer = 'configs/stable-diffusion/v1-inference.yaml'
-    
+
     done = False
     while not done:
         new_config['config'] = input('Configuration file for this model: ')
@@ -564,7 +565,7 @@ def add_weights_to_config(model_path:str, gen, opt, completer):
                 print('** Please enter a valid integer between 64 and 2048')
 
     make_default = input('Make this the default model? [n] ') in ('y','Y')
-    
+
     if write_config_file(opt.conf, gen, model_name, new_config, make_default=make_default):
         completer.add_model(model_name)
 
@@ -577,14 +578,14 @@ def del_config(model_name:str, gen, opt, completer):
     gen.model_cache.commit(opt.conf)
     print(f'** {model_name} deleted')
     completer.del_model(model_name)
-    
+
 def edit_config(model_name:str, gen, opt, completer):
     config = gen.model_cache.config
-    
+
     if model_name not in config:
         print(f'** Unknown model {model_name}')
         return
-    
+
     print(f'\n>> Editing model {model_name} from configuration file {opt.conf}')
 
     conf = config[model_name]
@@ -597,10 +598,10 @@ def edit_config(model_name:str, gen, opt, completer):
     make_default = input('Make this the default model? [n] ') in ('y','Y')
     completer.complete_extensions(None)
     write_config_file(opt.conf, gen, model_name, new_config, clobber=True, make_default=make_default)
-    
+
 def write_config_file(conf_path, gen, model_name, new_config, clobber=False, make_default=False):
     current_model = gen.model_name
-    
+
     op = 'modify' if clobber else 'import'
     print('\n>> New configuration:')
     if make_default:
@@ -623,7 +624,7 @@ def write_config_file(conf_path, gen, model_name, new_config, clobber=False, mak
         gen.model_cache.set_default_model(model_name)
 
     gen.model_cache.commit(conf_path)
-    
+
     do_switch = input(f'Keep model loaded? [y]')
     if len(do_switch)==0 or do_switch[0] in ('y','Y'):
         pass
@@ -653,7 +654,7 @@ def do_postprocess (gen, opt, callback):
         opt.prompt = opt.new_prompt
     else:
         opt.prompt = None
-        
+
     if os.path.dirname(file_path) == '': #basename given
         file_path = os.path.join(opt.outdir,file_path)
 
@@ -718,7 +719,7 @@ def add_postprocessing_to_metadata(opt,original_file,new_file,tool,command):
     )
     meta['image']['postprocessing'] = pp
     write_metadata(new_file,meta)
-    
+
 def prepare_image_metadata(
         opt,
         prefix,
@@ -789,28 +790,28 @@ def get_next_command(infile=None) -> str:  # command string
             print(f'#{command}')
     return command
 
-def invoke_ai_web_server_loop(gen, gfpgan, codeformer, esrgan):
+def invoke_ai_web_server_loop(gen: Generate, gfpgan, codeformer, esrgan):
     print('\n* --web was specified, starting web server...')
     from backend.invoke_ai_web_server import InvokeAIWebServer
     # Change working directory to the stable-diffusion directory
     os.chdir(
         os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
     )
-    
+
     invoke_ai_web_server = InvokeAIWebServer(generate=gen, gfpgan=gfpgan, codeformer=codeformer, esrgan=esrgan)
 
     try:
         invoke_ai_web_server.run()
     except KeyboardInterrupt:
         pass
-    
+
 def add_embedding_terms(gen,completer):
     '''
     Called after setting the model, updates the autocompleter with
     any terms loaded by the embedding manager.
     '''
     completer.add_embedding_terms(gen.model.embedding_manager.list_terms())
-    
+
 def split_variations(variations_string) -> list:
     # shotgun parsing, woo
     parts = []
@@ -867,15 +868,15 @@ def callback(img, step):
             image = gen.sample_to_image(img)
             image.save(filename,'PNG')
     return callback
-    
+
 def retrieve_dream_command(opt,command,completer):
     '''
     Given a full or partial path to a previously-generated image file,
     will retrieve and format the dream command used to generate the image,
     and pop it into the readline buffer (linux, Mac), or print out a comment
     for cut-and-paste (windows)
 
-    Given a wildcard path to a folder with image png files, 
+    Given a wildcard path to a folder with image png files,
     will retrieve and format the dream command used to generate the images,
     and save them to a file commands.txt for further processing
     '''
@@ -911,7 +912,7 @@ def write_commands(opt, file_path:str, outfilepath:str):
     except ValueError:
         print(f'## "{basename}": unacceptable pattern')
         return
- 
+
     commands = []
     cmd = None
     for path in paths:
@@ -940,7 +941,7 @@ def emergency_model_reconfigure():
     print('   After reconfiguration is done, please relaunch invoke.py.                      ')
     print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
     print('configure_invokeai is launching....\n')
-    
+
     sys.argv = ['configure_invokeai','--interactive']
     import configure_invokeai
     configure_invokeai.main()
diff --git a/ldm/invoke/conditioning.py b/ldm/invoke/conditioning.py
@@ -19,25 +19,33 @@
 
 
 def get_uc_and_c_and_ec(prompt_string, model, log_tokens=False, skip_normalize_legacy_blend=False):
-
-    prompt, negative_prompt = get_prompt_structure(prompt_string, skip_normalize_legacy_blend=skip_normalize_legacy_blend)
+    prompt, negative_prompt = get_prompt_structure(prompt_string,
+                                                   skip_normalize_legacy_blend=skip_normalize_legacy_blend)
     conditioning = _get_conditioning_for_prompt(prompt, negative_prompt, model, log_tokens)
 
     return conditioning
 
-def get_prompt_structure(prompt_string, skip_normalize_legacy_blend: bool=False) -> (Union[FlattenedPrompt, Blend], FlattenedPrompt):
+
+def get_prompt_structure(prompt_string, skip_normalize_legacy_blend: bool = False) -> (
+Union[FlattenedPrompt, Blend], FlattenedPrompt):
     """
     parse the passed-in prompt string and return tuple (positive_prompt, negative_prompt)
     """
-    prompt, negative_prompt = _parse_prompt_string(prompt_string, skip_normalize_legacy_blend=skip_normalize_legacy_blend)
+    prompt, negative_prompt = _parse_prompt_string(prompt_string,
+                                                   skip_normalize_legacy_blend=skip_normalize_legacy_blend)
     return prompt, negative_prompt
 
+
 def get_tokens_for_prompt(model, parsed_prompt: FlattenedPrompt) -> [str]:
-    text_fragments = [(x.text if x is Fragment else x.original.text if x is CrossAttentionControlSubstitute else str(x))
+    text_fragments = [x.text if type(x) is Fragment else
+                      (" ".join([f.text for f in x.original]) if type(x) is CrossAttentionControlSubstitute else
+                       str(x))
                       for x in parsed_prompt.children]
-    tokens = model.cond_stage_model.tokenizer.tokenize(text_fragments)
+    text = " ".join(text_fragments)
+    tokens = model.cond_stage_model.tokenizer.tokenize(text)
     return tokens
 
+
 def _parse_prompt_string(prompt_string_uncleaned, skip_normalize_legacy_blend=False) -> Union[FlattenedPrompt, Blend]:
     # Extract Unconditioned Words From Prompt
     unconditioned_words = ''
@@ -67,6 +75,7 @@ def _parse_prompt_string(prompt_string_uncleaned, skip_normalize_legacy_blend=Fa
     parsed_negative_prompt: FlattenedPrompt = pp.parse_conjunction(unconditioned_words).prompts[0]
     return parsed_prompt, parsed_negative_prompt
 
+
 def _get_conditioning_for_prompt(parsed_prompt: Union[Blend, FlattenedPrompt], parsed_negative_prompt: FlattenedPrompt,
                                  model, log_tokens=False) \
     -> tuple[torch.Tensor, torch.Tensor, InvokeAIDiffuserComponent.ExtraConditioningInfo]:
@@ -102,7 +111,8 @@ def _get_conditioning_for_prompt(parsed_prompt: Union[Blend, FlattenedPrompt], p
         # hybrid conditioning is in play
         unconditioning, conditioning = _flatten_hybrid_conditioning(unconditioning, conditioning)
         if cac_args is not None:
-            print(">> Hybrid conditioning cannot currently be combined with cross attention control. Cross attention control will be ignored.")
+            print(
+                ">> Hybrid conditioning cannot currently be combined with cross attention control. Cross attention control will be ignored.")
             cac_args = None
 
     return (
@@ -112,8 +122,7 @@ def _get_conditioning_for_prompt(parsed_prompt: Union[Blend, FlattenedPrompt], p
     )
 
 
-
-def _get_conditioning_for_cross_attention_control(model, prompt: FlattenedPrompt, log_tokens: bool=True):
+def _get_conditioning_for_cross_attention_control(model, prompt: FlattenedPrompt, log_tokens: bool = True):
     original_prompt = FlattenedPrompt()
     edited_prompt = FlattenedPrompt()
     # for name, a0, a1, b0, b1 in edit_opcodes: only name == 'equal' is currently parsed
@@ -185,7 +194,6 @@ def _get_conditioning_for_cross_attention_control(model, prompt: FlattenedPrompt
     return conditioning, cac_args
 
 
-
 def _get_conditioning_for_blend(model, blend: Blend, log_tokens: bool = False):
     embeddings_to_blend = None
     for i, flattened_prompt in enumerate(blend.prompts):
@@ -201,7 +209,8 @@ def _get_conditioning_for_blend(model, blend: Blend, log_tokens: bool = False):
     return conditioning
 
 
-def _get_embeddings_and_tokens_for_prompt(model, flattened_prompt: FlattenedPrompt, log_tokens: bool=False, log_display_label: str=None):
+def _get_embeddings_and_tokens_for_prompt(model, flattened_prompt: FlattenedPrompt, log_tokens: bool = False,
+                                          log_display_label: str = None):
     if type(flattened_prompt) is not FlattenedPrompt:
         raise Exception(f"embeddings can only be made from FlattenedPrompts, got {type(flattened_prompt)} instead")
     fragments = [x.text for x in flattened_prompt.children]
@@ -213,11 +222,13 @@ def _get_embeddings_and_tokens_for_prompt(model, flattened_prompt: FlattenedProm
 
     return embeddings, tokens
 
+
 def _get_tokens_length(model, fragments: list[Fragment]):
     fragment_texts = [x.text for x in fragments]
     tokens = model.cond_stage_model.get_tokens(fragment_texts, include_start_and_end_markers=False)
     return sum([len(x) for x in tokens])
 
+
 def _flatten_hybrid_conditioning(uncond, cond):
     '''
     This handles the choice between a conditional conditioning
@@ -244,7 +255,7 @@ def log_tokenization(text, model, display_label=None):
     # but for readability it has been replaced with ' '
     """
 
-    tokens    = model.cond_stage_model.tokenizer.tokenize(text)
+    tokens = model.cond_stage_model.tokenizer.tokenize(text)
     tokenized = ""
     discarded = ""
     usedTokens = 0
@@ -261,5 +272,5 @@ def log_tokenization(text, model, display_label=None):
     print(f"\n>> Tokens {display_label or ''} ({usedTokens}):\n{tokenized}\x1b[0m")
     if discarded != "":
         print(
-            f">> Tokens Discarded ({totalTokens-usedTokens}):\n{discarded}\x1b[0m"
+            f">> Tokens Discarded ({totalTokens - usedTokens}):\n{discarded}\x1b[0m"
         )
diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py
@@ -103,11 +103,11 @@ def generate(self,prompt,init_image,width,height,sampler, iterations=1,seed=None
                 results.append([image, seed])
 
                 if image_callback is not None:
-                    image_callback(image, seed, first_seed=first_seed)
+                    image_callback(image, seed, first_seed=first_seed, attention_maps_image=attention_maps_images[-1])
 
                 seed = self.new_seed()
 
-        return results, attention_maps_images
+        return results
 
     def sample_to_image(self,samples)->Image.Image:
         """