[Whisper] fix docstrings typo (#35319)

eustlb · web-flow · commit 75be5a0a5b18 · 2024-12-18T16:38:19.000+01:00
typos docstring
diff --git a/src/transformers/models/whisper/generation_whisper.py b/src/transformers/models/whisper/generation_whisper.py
@@ -382,7 +382,7 @@ def generate(
                 the soundfile library (`pip install soundfile`). To prepare the array into `input_features`, the
                 [`AutoFeatureExtractor`] should be used for extracting the mel features, padding and conversion into a
                 tensor of type `torch.FloatTensor`. See [`~WhisperFeatureExtractor.__call__`] for details.
-            generation_config (`~generation.GenerationConfig`, *optional*):
+            generation_config ([`~generation.GenerationConfig`], *optional*):
                 The generation configuration to be used as base parametrization for the generation call. `**kwargs`
                 passed to generate matching the attributes of `generation_config` will override them. If
                 `generation_config` is not provided, the default will be used, which had the following loading
@@ -480,8 +480,8 @@ def generate(
                 `return_segments` is set True. In this case the generation outputs of each segment is added to each
                 segment.
             force_unique_generate_call (`bool`, *optional*):
-                Whether to force a unique call to the underlying GenerationMixin's generate method. This is useful for assisted decoding and testing purposes to ensure
-                that only one call to generate is made and therefore decoder input token ids and eos token ids are returned.
+                Whether to force a unique call to the underlying GenerationMixin's [~generation.GenerationMixin.generate] method. This is useful for assisted decoding and testing purposes to ensure
+                that only one call to [~generation.GenerationMixin.generate] is made and therefore decoder input token ids and eos token ids are returned.
             kwargs (`Dict[str, Any]`, *optional*):
                 Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
                 forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
@@ -495,18 +495,18 @@ def generate(
                 - `torch.LongTensor` in all other cases, excluding the decoder input ids and end of sequence id.
 
                 The possible [`~utils.ModelOutput`] types are:
-                - [`~utils.GenerateEncoderDecoderOutput`]
-                - [`~utils.GenerateBeamEncoderDecoderOutput`]
+                - [`~generation.GenerateEncoderDecoderOutput`]
+                - [`~generation.GenerateBeamEncoderDecoderOutput`]
 
                 `segments` is a list of lists (one list per batch element) of `segment`.
                 A `segment` is a dictionary with keys `start`, `end`, `tokens`, `idxs`, and `result`.
                 - `start`: the start timestamp of the segment.
                 - `end`: the end timestamp of the segment.
                 - `tokens`: the tokens of the segment, excluding the decoder input ids and end of sequence id.
-                - `idxs`: the start (included) and end (excluded) indices of the `tokens` of the segment in the underlying call to GenerationMixin's `generate` (present in `result`).
-                - `result`: the result of the underlying call to GenerationMixin's `generate`.
+                - `idxs`: the start (included) and end (excluded) indices of the `tokens` of the segment in the underlying call to GenerationMixin's [~generation.GenerationMixin.generate] (present in `result`).
+                - `result`: the result of the underlying call to GenerationMixin's [~generation.GenerationMixin.generate].
 
-                When `return_timestamps=True`, `return_dict_in_generate=True` applies to each call of the underlying GenerationMixin's `generate`, with outputs stored in `result` of each `segment`.
+                When `return_timestamps=True`, `return_dict_in_generate=True` applies to each call of the underlying GenerationMixin's [~generation.GenerationMixin.generate], with outputs stored in `result` of each `segment`.
 
         Example:
 
@@ -543,7 +543,7 @@ def generate(
         ```
 
         - *Shortform transcription*: If passed mel input features are <= 30 seconds, there are two possibilities:
-            - `return_timestamps=False`: the whole audio will be transcribed with a single call to GenerationMixin's generate.
+            - `return_timestamps=False`: the whole audio will be transcribed with a single call to GenerationMixin's [~generation.GenerationMixin.generate].
             - `return_timestamps=True`: the audio will be transcribed using the same logic as long-form transcription.
 
         ```python