|
13 | 13 | # limitations under the License. |
14 | 14 | """Configuration base class and utilities.""" |
15 | 15 |
|
16 | | -import copy |
17 | | -import json |
18 | 16 | import os |
19 | | -import warnings |
20 | 17 | from dataclasses import dataclass |
21 | 18 | from pathlib import Path |
22 | 19 | from typing import Any |
|
47 | 44 | ) |
48 | 45 | from .training_args import ParallelMode |
49 | 46 | from .utils import ( |
50 | | - MODEL_CARD_NAME, |
51 | | - cached_file, |
52 | 47 | is_datasets_available, |
53 | 48 | is_tokenizers_available, |
54 | 49 | is_torch_available, |
|
76 | 71 | logger = logging.get_logger(__name__) |
77 | 72 |
|
78 | 73 |
|
79 | | -class ModelCard: |
80 | | - r""" |
81 | | - Structured Model Card class. Store model card as well as methods for loading/downloading/saving model cards. |
82 | | -
|
83 | | - Please read the following paper for details and explanation on the sections: "Model Cards for Model Reporting" by |
84 | | - Margaret Mitchell, Simone Wu, Andrew Zaldivar, Parker Barnes, Lucy Vasserman, Ben Hutchinson, Elena Spitzer, |
85 | | - Inioluwa Deborah Raji and Timnit Gebru for the proposal behind model cards. Link: https://huggingface.co/papers/1810.03993 |
86 | | -
|
87 | | - Note: A model card can be loaded and saved to disk. |
88 | | - """ |
89 | | - |
90 | | - def __init__(self, **kwargs): |
91 | | - warnings.warn( |
92 | | - "The class `ModelCard` is deprecated and will be removed in version 5 of Transformers", FutureWarning |
93 | | - ) |
94 | | - # Recommended attributes from https://huggingface.co/papers/1810.03993 (see papers) |
95 | | - self.model_details = kwargs.pop("model_details", {}) |
96 | | - self.intended_use = kwargs.pop("intended_use", {}) |
97 | | - self.factors = kwargs.pop("factors", {}) |
98 | | - self.metrics = kwargs.pop("metrics", {}) |
99 | | - self.evaluation_data = kwargs.pop("evaluation_data", {}) |
100 | | - self.training_data = kwargs.pop("training_data", {}) |
101 | | - self.quantitative_analyses = kwargs.pop("quantitative_analyses", {}) |
102 | | - self.ethical_considerations = kwargs.pop("ethical_considerations", {}) |
103 | | - self.caveats_and_recommendations = kwargs.pop("caveats_and_recommendations", {}) |
104 | | - |
105 | | - # Open additional attributes |
106 | | - for key, value in kwargs.items(): |
107 | | - try: |
108 | | - setattr(self, key, value) |
109 | | - except AttributeError as err: |
110 | | - logger.error(f"Can't set {key} with value {value} for {self}") |
111 | | - raise err |
112 | | - |
113 | | - def save_pretrained(self, save_directory_or_file): |
114 | | - """Save a model card object to the directory or file `save_directory_or_file`.""" |
115 | | - if os.path.isdir(save_directory_or_file): |
116 | | - # If we save using the predefined names, we can load using `from_pretrained` |
117 | | - output_model_card_file = os.path.join(save_directory_or_file, MODEL_CARD_NAME) |
118 | | - else: |
119 | | - output_model_card_file = save_directory_or_file |
120 | | - |
121 | | - self.to_json_file(output_model_card_file) |
122 | | - logger.info(f"Model card saved in {output_model_card_file}") |
123 | | - |
124 | | - @classmethod |
125 | | - def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): |
126 | | - r""" |
127 | | - Instantiate a [`ModelCard`] from a pre-trained model model card. |
128 | | -
|
129 | | - Parameters: |
130 | | - pretrained_model_name_or_path: either: |
131 | | -
|
132 | | - - a string, the *model id* of a pretrained model card hosted inside a model repo on huggingface.co. |
133 | | - - a path to a *directory* containing a model card file saved using the [`~ModelCard.save_pretrained`] |
134 | | - method, e.g.: `./my_model_directory/`. |
135 | | - - a path or url to a saved model card JSON *file*, e.g.: `./my_model_directory/modelcard.json`. |
136 | | -
|
137 | | - cache_dir: (*optional*) string: |
138 | | - Path to a directory in which a downloaded pre-trained model card should be cached if the standard cache |
139 | | - should not be used. |
140 | | -
|
141 | | - kwargs: (*optional*) dict: key/value pairs with which to update the ModelCard object after loading. |
142 | | -
|
143 | | - - The values in kwargs of any keys which are model card attributes will be used to override the loaded |
144 | | - values. |
145 | | - - Behavior concerning key/value pairs whose keys are *not* model card attributes is controlled by the |
146 | | - *return_unused_kwargs* keyword parameter. |
147 | | -
|
148 | | - proxies: (*optional*) dict, default None: |
149 | | - A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', |
150 | | - 'http://hostname': 'foo.bar:4012'}. The proxies are used on each request. |
151 | | -
|
152 | | - return_unused_kwargs: (*optional*) bool: |
153 | | -
|
154 | | - - If False, then this function returns just the final model card object. |
155 | | - - If True, then this functions returns a tuple *(model card, unused_kwargs)* where *unused_kwargs* is a |
156 | | - dictionary consisting of the key/value pairs whose keys are not model card attributes: ie the part of |
157 | | - kwargs which has not been used to update *ModelCard* and is otherwise ignored. |
158 | | -
|
159 | | - Examples: |
160 | | -
|
161 | | - ```python |
162 | | - # Download model card from huggingface.co and cache. |
163 | | - modelcard = ModelCard.from_pretrained("google-bert/bert-base-uncased") |
164 | | - # Model card was saved using *save_pretrained('./test/saved_model/')* |
165 | | - modelcard = ModelCard.from_pretrained("./test/saved_model/") |
166 | | - modelcard = ModelCard.from_pretrained("./test/saved_model/modelcard.json") |
167 | | - modelcard = ModelCard.from_pretrained("google-bert/bert-base-uncased", output_attentions=True, foo=False) |
168 | | - ```""" |
169 | | - cache_dir = kwargs.pop("cache_dir", None) |
170 | | - proxies = kwargs.pop("proxies", None) |
171 | | - return_unused_kwargs = kwargs.pop("return_unused_kwargs", False) |
172 | | - from_pipeline = kwargs.pop("_from_pipeline", None) |
173 | | - |
174 | | - user_agent = {"file_type": "model_card"} |
175 | | - if from_pipeline is not None: |
176 | | - user_agent["using_pipeline"] = from_pipeline |
177 | | - |
178 | | - is_local = os.path.isdir(pretrained_model_name_or_path) |
179 | | - if os.path.isfile(pretrained_model_name_or_path): |
180 | | - resolved_model_card_file = pretrained_model_name_or_path |
181 | | - is_local = True |
182 | | - else: |
183 | | - try: |
184 | | - # Load from URL or cache if already cached |
185 | | - resolved_model_card_file = cached_file( |
186 | | - pretrained_model_name_or_path, |
187 | | - filename=MODEL_CARD_NAME, |
188 | | - cache_dir=cache_dir, |
189 | | - proxies=proxies, |
190 | | - user_agent=user_agent, |
191 | | - ) |
192 | | - if is_local: |
193 | | - logger.info(f"loading model card file {resolved_model_card_file}") |
194 | | - else: |
195 | | - logger.info(f"loading model card file {MODEL_CARD_NAME} from cache at {resolved_model_card_file}") |
196 | | - # Load model card |
197 | | - modelcard = cls.from_json_file(resolved_model_card_file) |
198 | | - |
199 | | - except (OSError, json.JSONDecodeError): |
200 | | - # We fall back on creating an empty model card |
201 | | - modelcard = cls() |
202 | | - |
203 | | - # Update model card with kwargs if needed |
204 | | - to_remove = [] |
205 | | - for key, value in kwargs.items(): |
206 | | - if hasattr(modelcard, key): |
207 | | - setattr(modelcard, key, value) |
208 | | - to_remove.append(key) |
209 | | - for key in to_remove: |
210 | | - kwargs.pop(key, None) |
211 | | - |
212 | | - logger.info(f"Model card: {modelcard}") |
213 | | - if return_unused_kwargs: |
214 | | - return modelcard, kwargs |
215 | | - else: |
216 | | - return modelcard |
217 | | - |
218 | | - @classmethod |
219 | | - def from_dict(cls, json_object): |
220 | | - """Constructs a `ModelCard` from a Python dictionary of parameters.""" |
221 | | - return cls(**json_object) |
222 | | - |
223 | | - @classmethod |
224 | | - def from_json_file(cls, json_file): |
225 | | - """Constructs a `ModelCard` from a json file of parameters.""" |
226 | | - with open(json_file, encoding="utf-8") as reader: |
227 | | - text = reader.read() |
228 | | - dict_obj = json.loads(text) |
229 | | - return cls(**dict_obj) |
230 | | - |
231 | | - def __eq__(self, other): |
232 | | - return self.__dict__ == other.__dict__ |
233 | | - |
234 | | - def __repr__(self): |
235 | | - return str(self.to_json_string()) |
236 | | - |
237 | | - def to_dict(self): |
238 | | - """Serializes this instance to a Python dictionary.""" |
239 | | - output = copy.deepcopy(self.__dict__) |
240 | | - return output |
241 | | - |
242 | | - def to_json_string(self): |
243 | | - """Serializes this instance to a JSON string.""" |
244 | | - return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" |
245 | | - |
246 | | - def to_json_file(self, json_file_path): |
247 | | - """Save this instance to a json file.""" |
248 | | - with open(json_file_path, "w", encoding="utf-8") as writer: |
249 | | - writer.write(self.to_json_string()) |
250 | | - |
251 | | - |
252 | 74 | AUTOGENERATED_TRAINER_COMMENT = """ |
253 | 75 | <!-- This model card has been generated automatically according to the information the Trainer had access to. You |
254 | 76 | should probably proofread and complete it, then remove this comment. --> |
|
0 commit comments