##### Copyright 2019 The TensorFlow Authors.


In [1]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 분산 전략을 사용한 모델 저장 및 불러오기

<table class="tfo-notebook-buttons" align="left">
  <td><a target="_blank" href="https://www.tensorflow.org/tutorials/distribute/save_and_load"><img src="https://www.tensorflow.org/images/tf_logo_32px.png">TensorFlow.org에서 보기</a></td>
  <td><a target="_blank" href="https://colab.research.google.com/github/tensorflow/docs-l10n/blob/master/site/ko/tutorials/distribute/save_and_load.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png">Google Colab에서 실행하기</a></td>
  <td><a target="_blank" href="https://github.com/tensorflow/docs-l10n/blob/master/site/ko/tutorials/distribute/save_and_load.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png">GitHub에서 소스 보기</a></td>
  <td><a href="https://storage.googleapis.com/tensorflow_docs/docs-l10n/site/ko/tutorials/distribute/save_and_load.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png">노트북 다운로드하기</a></td>
</table>

## 개요

이 튜토리얼에서는 훈련 중 또는 훈련 후에 `tf.distribute.Strategy`를 사용하여 SavedModel 형식으로 모델을 저장하고 로드하는 방법을 보여줍니다. Keras 모델을 저장하고 로드하는 API에는 상위 수준(`tf.keras.Model.save` 및 `tf.keras.models.load_model`) 및 하위 수준(`tf.saved_model.save` 및 `tf.saved_model.load`)의 두 가지 종류가 있습니다.

SavedModel 및 직렬화에 대한 일반적인 내용은 [저장된 모델 가이드](../../guide/saved_model.ipynb) 및 [Keras 모델 직렬화 가이드](https://www.tensorflow.org/guide/keras/save_and_serialize)를 참조하세요. 간단한 예부터 시작하겠습니다.

주의: TensorFlow 모델은 코드이며 신뢰할 수 없는 코드에 주의하는 것이 중요합니다. 자세한 내용은 [TensorFlow 안전하게 사용하기](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md)를 참조하세요.


필요한 패키지 가져오기:

In [2]:
import tensorflow_datasets as tfds

import tensorflow as tf


2022-12-15 02:03:06.609082: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-12-15 02:03:06.609187: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


TensorFlow Datasets 및 `tf.data`를 사용하여 데이터를 로드 및 준비하고 `tf.distribute.MirroredStrategy`를 사용하여 모델을 생성합니다.

In [3]:
mirrored_strategy = tf.distribute.MirroredStrategy()

def get_data():
  datasets = tfds.load(name='mnist', as_supervised=True)
  mnist_train, mnist_test = datasets['train'], datasets['test']

  BUFFER_SIZE = 10000

  BATCH_SIZE_PER_REPLICA = 64
  BATCH_SIZE = BATCH_SIZE_PER_REPLICA * mirrored_strategy.num_replicas_in_sync

  def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255

    return image, label

  train_dataset = mnist_train.map(scale).cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
  eval_dataset = mnist_test.map(scale).batch(BATCH_SIZE)

  return train_dataset, eval_dataset

def get_model():
  with mirrored_strategy.scope():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10)
    ])

    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  optimizer=tf.keras.optimizers.Adam(),
                  metrics=[tf.metrics.SparseCategoricalAccuracy()])
    return model

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


`tf.keras.Model.fit`을 사용하여 모델을 훈련시킵니다. 

In [4]:
model = get_model()
train_dataset, eval_dataset = get_data()
model.fit(train_dataset, epochs=2)

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


2022-12-15 02:03:13.997281: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:549] The `assert_cardinality` transformation is currently not handled by the auto-shard rewrite and will be removed.


Epoch 1/2


INFO:tensorflow:batch_all_reduce: 6 all-reduces with algorithm = nccl, num_packs = 1


INFO:tensorflow:batch_all_reduce: 6 all-reduces with algorithm = nccl, num_packs = 1


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:batch_all_reduce: 6 all-reduces with algorithm = nccl, num_packs = 1


INFO:tensorflow:batch_all_reduce: 6 all-reduces with algorithm = nccl, num_packs = 1


  1/235 [..............................] - ETA: 39:57 - loss: 2.3080 - sparse_categorical_accuracy: 0.0859

  8/235 [>.............................] - ETA: 1s - loss: 1.9091 - sparse_categorical_accuracy: 0.4790   

 15/235 [>.............................] - ETA: 1s - loss: 1.5285 - sparse_categorical_accuracy: 0.6125

 22/235 [=>............................] - ETA: 1s - loss: 1.2620 - sparse_categorical_accuracy: 0.6744

 29/235 [==>...........................] - ETA: 1s - loss: 1.0787 - sparse_categorical_accuracy: 0.7193

 36/235 [===>..........................] - ETA: 1s - loss: 0.9502 - sparse_categorical_accuracy: 0.7508

 43/235 [====>.........................] - ETA: 1s - loss: 0.8511 - sparse_categorical_accuracy: 0.7751

 50/235 [=====>........................] - ETA: 1s - loss: 0.7735 - sparse_categorical_accuracy: 0.7933





















































Epoch 2/2


  1/235 [..............................] - ETA: 8s - loss: 0.1099 - sparse_categorical_accuracy: 0.9727

  9/235 [>.............................] - ETA: 1s - loss: 0.1211 - sparse_categorical_accuracy: 0.9670

 17/235 [=>............................] - ETA: 1s - loss: 0.1223 - sparse_categorical_accuracy: 0.9646

 25/235 [==>...........................] - ETA: 1s - loss: 0.1185 - sparse_categorical_accuracy: 0.9664

 33/235 [===>..........................] - ETA: 1s - loss: 0.1179 - sparse_categorical_accuracy: 0.9664

 41/235 [====>.........................] - ETA: 1s - loss: 0.1173 - sparse_categorical_accuracy: 0.9660

 49/235 [=====>........................] - ETA: 1s - loss: 0.1153 - sparse_categorical_accuracy: 0.9664

















































<keras.callbacks.History at 0x7f264012cfd0>

## 모델 저장하고 불러오기

이제 작업할 간단한 모델이 생겼으므로 API 저장/로드하기를 살펴보겠습니다. 두 가지 종류의 API를 사용할 수 있습니다.

- 상위 수준(Keras): `Model.save` 및 `tf.keras.models.load_model`
- 하위 수준: `tf.saved_model.save` 및 `tf.saved_model.load`


### Keras API

다음은 Keras API를 사용하여 모델을 저장하고 로드하는 예입니다.

In [5]:
keras_model_path = '/tmp/keras_save'
model.save(keras_model_path)



INFO:tensorflow:Assets written to: /tmp/keras_save/assets


INFO:tensorflow:Assets written to: /tmp/keras_save/assets


`tf.distribute.Strategy`없이 모델 복원시키기:

In [6]:
restored_keras_model = tf.keras.models.load_model(keras_model_path)
restored_keras_model.fit(train_dataset, epochs=2)

Epoch 1/2


  1/235 [..............................] - ETA: 4:11 - loss: 0.0745 - sparse_categorical_accuracy: 0.9727

 16/235 [=>............................] - ETA: 0s - loss: 0.0735 - sparse_categorical_accuracy: 0.9807  

 32/235 [===>..........................] - ETA: 0s - loss: 0.0763 - sparse_categorical_accuracy: 0.9792

 48/235 [=====>........................] - ETA: 0s - loss: 0.0732 - sparse_categorical_accuracy: 0.9800



























Epoch 2/2


  1/235 [..............................] - ETA: 6s - loss: 0.0443 - sparse_categorical_accuracy: 0.9883

 15/235 [>.............................] - ETA: 0s - loss: 0.0596 - sparse_categorical_accuracy: 0.9831

 30/235 [==>...........................] - ETA: 0s - loss: 0.0584 - sparse_categorical_accuracy: 0.9833

 45/235 [====>.........................] - ETA: 0s - loss: 0.0531 - sparse_categorical_accuracy: 0.9840



























<keras.callbacks.History at 0x7f2614191a00>

모델을 복원한 후 `Model.compile`을 다시 호출할 필요 없이 모델에 대한 훈련을 계속할 수 있습니다. 저장하기 전에 이미 컴파일되었기 때문입니다. 모델은 TensorFlow의 표준 `SavedModel` proto 형식으로 저장됩니다. 자세한 내용은 <a href="../../guide/saved_model.ipynb" data-md-type="link">`SavedModel` 형식 가이드</a>를 참조하세요.

이제 모델을 복원하고 `tf.distribute.Strategy`를 사용하여 훈련시킵니다.

In [7]:
another_strategy = tf.distribute.OneDeviceStrategy('/cpu:0')
with another_strategy.scope():
  restored_keras_model_ds = tf.keras.models.load_model(keras_model_path)
  restored_keras_model_ds.fit(train_dataset, epochs=2)

Epoch 1/2


2022-12-15 02:03:32.471904: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:549] The `assert_cardinality` transformation is currently not handled by the auto-shard rewrite and will be removed.
2022-12-15 02:03:32.534336: W tensorflow/core/framework/dataset.cc:769] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


  1/235 [..............................] - ETA: 2:35 - loss: 0.0768 - sparse_categorical_accuracy: 0.9766

  5/235 [..............................] - ETA: 3s - loss: 0.0921 - sparse_categorical_accuracy: 0.9742  

  9/235 [>.............................] - ETA: 3s - loss: 0.0943 - sparse_categorical_accuracy: 0.9718

 13/235 [>.............................] - ETA: 3s - loss: 0.0887 - sparse_categorical_accuracy: 0.9739

 17/235 [=>............................] - ETA: 3s - loss: 0.0866 - sparse_categorical_accuracy: 0.9731

 21/235 [=>............................] - ETA: 2s - loss: 0.0849 - sparse_categorical_accuracy: 0.9751

 26/235 [==>...........................] - ETA: 2s - loss: 0.0806 - sparse_categorical_accuracy: 0.9767

 31/235 [==>...........................] - ETA: 2s - loss: 0.0795 - sparse_categorical_accuracy: 0.9766

 35/235 [===>..........................] - ETA: 2s - loss: 0.0784 - sparse_categorical_accuracy: 0.9767

 39/235 [===>..........................] - ETA: 2s - loss: 0.0768 - sparse_categorical_accuracy: 0.9777

 43/235 [====>.........................] - ETA: 2s - loss: 0.0756 - sparse_categorical_accuracy: 0.9778

 48/235 [=====>........................] - ETA: 2s - loss: 0.0740 - sparse_categorical_accuracy: 0.9790

 53/235 [=====>........................] - ETA: 2s - loss: 0.0735 - sparse_categorical_accuracy: 0.9790























































































Epoch 2/2


  1/235 [..............................] - ETA: 9s - loss: 0.0440 - sparse_categorical_accuracy: 0.9766

  6/235 [..............................] - ETA: 2s - loss: 0.0483 - sparse_categorical_accuracy: 0.9844

 10/235 [>.............................] - ETA: 2s - loss: 0.0487 - sparse_categorical_accuracy: 0.9828

 15/235 [>.............................] - ETA: 2s - loss: 0.0481 - sparse_categorical_accuracy: 0.9836

 20/235 [=>............................] - ETA: 2s - loss: 0.0509 - sparse_categorical_accuracy: 0.9834

 25/235 [==>...........................] - ETA: 2s - loss: 0.0494 - sparse_categorical_accuracy: 0.9844

 30/235 [==>...........................] - ETA: 2s - loss: 0.0483 - sparse_categorical_accuracy: 0.9852

 35/235 [===>..........................] - ETA: 2s - loss: 0.0500 - sparse_categorical_accuracy: 0.9845

 40/235 [====>.........................] - ETA: 2s - loss: 0.0511 - sparse_categorical_accuracy: 0.9844

 44/235 [====>.........................] - ETA: 2s - loss: 0.0508 - sparse_categorical_accuracy: 0.9846

 49/235 [=====>........................] - ETA: 2s - loss: 0.0505 - sparse_categorical_accuracy: 0.9849

 53/235 [=====>........................] - ETA: 2s - loss: 0.0507 - sparse_categorical_accuracy: 0.9848



















































































`Model.fit` 출력에서 볼 수 있듯이 로드는 `tf.distribute.Strategy`에서 예상대로 작동합니다. 여기에 사용된 전략은 저장하기 전에 사용한 전략과 동일하지 않아도 됩니다. 

### `tf.saved_model` API

하위 수준 API로 모델을 저장하는 것은 Keras API와 유사합니다.

In [8]:
model = get_model()  # get a fresh model
saved_model_path = '/tmp/tf_save'
tf.saved_model.save(model, saved_model_path)



INFO:tensorflow:Assets written to: /tmp/tf_save/assets


INFO:tensorflow:Assets written to: /tmp/tf_save/assets


`tf.saved_model.load`를 사용하여 로드할 수 있습니다. 그러나 이것은 하위 수준 API(따라서 사용 사례의 범위가 더 넓음)이기 때문에 Keras 모델을 반환하지 않습니다. 대신 추론을 수행하는 데 사용할 수 있는 함수가 포함된 객체를 반환합니다. 예를 들면 다음과 같습니다.

In [9]:
DEFAULT_FUNCTION_KEY = 'serving_default'
loaded = tf.saved_model.load(saved_model_path)
inference_func = loaded.signatures[DEFAULT_FUNCTION_KEY]

로드된 객체에는 각각 키와 연결된 여러 함수가 포함될 수 있습니다. `"serving_default"` 키는 저장된 Keras 모델이 있는 추론 함수의 기본 키입니다. 이 함수로 추론하려면 다음과 같이 합니다. 

In [10]:
predict_dataset = eval_dataset.map(lambda image, label: image)
for batch in predict_dataset.take(1):
  print(inference_func(batch))

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


2022-12-15 02:03:40.597935: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


{'dense_3': <tf.Tensor: shape=(256, 10), dtype=float32, numpy=
array([[-0.07287998, -0.06065657,  0.03483895, ..., -0.18132585,
        -0.08910748,  0.06052908],
       [-0.31163222, -0.02226205, -0.13526598, ..., -0.04082797,
        -0.23755129,  0.04015619],
       [-0.26187348, -0.09974411, -0.18940257, ..., -0.17564824,
        -0.11286059, -0.00997314],
       ...,
       [-0.24347621, -0.09062086, -0.08512177, ..., -0.0498026 ,
        -0.02609731, -0.0913346 ],
       [-0.12913392, -0.0360139 , -0.04862705, ...,  0.04664   ,
        -0.02082852, -0.01882938],
       [-0.12633127, -0.14244772, -0.08439697, ..., -0.18966636,
        -0.21226291, -0.08730254]], dtype=float32)>}


또한 분산방식으로 불러오고 추론할 수 있습니다:

In [11]:
another_strategy = tf.distribute.MirroredStrategy()
with another_strategy.scope():
  loaded = tf.saved_model.load(saved_model_path)
  inference_func = loaded.signatures[DEFAULT_FUNCTION_KEY]

  dist_predict_dataset = another_strategy.experimental_distribute_dataset(
      predict_dataset)

  # Calling the function in a distributed manner
  for batch in dist_predict_dataset:
    result = another_strategy.run(inference_func, args=(batch,))
    print(result)
    break

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


2022-12-15 02:03:40.864456: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:549] The `assert_cardinality` transformation is currently not handled by the auto-shard rewrite and will be removed.






{'dense_3': PerReplica:{
  0: <tf.Tensor: shape=(64, 10), dtype=float32, numpy=
array([[-7.28799775e-02, -6.06565699e-02,  3.48389521e-02,
         1.01685286e-01,  1.32346496e-01,  3.62287462e-02,
        -4.74997982e-02, -1.81325853e-01, -8.91074836e-02,
         6.05290756e-02],
       [-3.11632216e-01, -2.22620517e-02, -1.35265976e-01,
         2.21318334e-01,  1.89438641e-01,  2.96817273e-02,
        -1.30453795e-01, -4.08279747e-02, -2.37551287e-01,
         4.01561856e-02],
       [-2.61873484e-01, -9.97441113e-02, -1.89402565e-01,
         3.95955741e-02, -7.23269284e-02, -9.36339349e-02,
        -1.40353218e-02, -1.75648242e-01, -1.12860590e-01,
        -9.97313857e-03],
       [ 7.93537498e-03, -8.39788094e-02, -5.49430102e-02,
         5.06623089e-02, -8.49889666e-02, -6.26260862e-02,
        -1.38619915e-01, -1.57361925e-01, -6.83496967e-02,
        -4.66652215e-02],
       [-1.02909938e-01, -2.37993933e-02, -1.04971468e-01,
         4.83149216e-02,  1.14822879e-01, -5.8794

2022-12-15 02:03:41.411701: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


복원된 함수를 호출하는 것은 저장된 모델(`tf.keras.Model.predict`)에 대한 순방향 전달일 뿐입니다. 로드된 함수를 계속 훈련하려면 어떻게 해야 할까요? 또는 로드된 함수를 더 큰 모델에 포함해야 한다면 어떻게 해야 할까요? 일반적으로 이를 해결하는 방법은 이 로드된 객체를 Keras 레이어로 래핑하는 것입니다. 다행스럽게도 [TF Hub](https://www.tensorflow.org/hub)에는 다음과 같이 이 목적을 위한 [`hub.KerasLayer`](https://github.com/tensorflow/hub/blob/master/tensorflow_hub/keras_layer.py)가 있습니다.

In [12]:
import tensorflow_hub as hub

def build_model(loaded):
  x = tf.keras.layers.Input(shape=(28, 28, 1), name='input_x')
  # Wrap what's loaded to a KerasLayer
  keras_layer = hub.KerasLayer(loaded, trainable=True)(x)
  model = tf.keras.Model(x, keras_layer)
  return model

another_strategy = tf.distribute.MirroredStrategy()
with another_strategy.scope():
  loaded = tf.saved_model.load(saved_model_path)
  model = build_model(loaded)

  model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                optimizer=tf.keras.optimizers.Adam(),
                metrics=[tf.metrics.SparseCategoricalAccuracy()])
  model.fit(train_dataset, epochs=2)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')






2022-12-15 02:03:42.057240: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:549] The `assert_cardinality` transformation is currently not handled by the auto-shard rewrite and will be removed.


Epoch 1/2


INFO:tensorflow:batch_all_reduce: 6 all-reduces with algorithm = nccl, num_packs = 1


INFO:tensorflow:batch_all_reduce: 6 all-reduces with algorithm = nccl, num_packs = 1


INFO:tensorflow:batch_all_reduce: 6 all-reduces with algorithm = nccl, num_packs = 1


INFO:tensorflow:batch_all_reduce: 6 all-reduces with algorithm = nccl, num_packs = 1


  1/235 [..............................] - ETA: 20:41 - loss: 2.3070 - sparse_categorical_accuracy: 0.0977

  9/235 [>.............................] - ETA: 1s - loss: 1.8309 - sparse_categorical_accuracy: 0.5299   

 16/235 [=>............................] - ETA: 1s - loss: 1.4895 - sparse_categorical_accuracy: 0.6304

 24/235 [==>...........................] - ETA: 1s - loss: 1.2235 - sparse_categorical_accuracy: 0.6867

 32/235 [===>..........................] - ETA: 1s - loss: 1.0357 - sparse_categorical_accuracy: 0.7295

 40/235 [====>.........................] - ETA: 1s - loss: 0.9117 - sparse_categorical_accuracy: 0.7580

 48/235 [=====>........................] - ETA: 1s - loss: 0.8195 - sparse_categorical_accuracy: 0.7800

















































Epoch 2/2


  1/235 [..............................] - ETA: 8s - loss: 0.0723 - sparse_categorical_accuracy: 0.9844

  9/235 [>.............................] - ETA: 1s - loss: 0.1027 - sparse_categorical_accuracy: 0.9735

 17/235 [=>............................] - ETA: 1s - loss: 0.1070 - sparse_categorical_accuracy: 0.9713

 25/235 [==>...........................] - ETA: 1s - loss: 0.1134 - sparse_categorical_accuracy: 0.9689

 33/235 [===>..........................] - ETA: 1s - loss: 0.1175 - sparse_categorical_accuracy: 0.9682

 40/235 [====>.........................] - ETA: 1s - loss: 0.1157 - sparse_categorical_accuracy: 0.9682

 47/235 [=====>........................] - ETA: 1s - loss: 0.1149 - sparse_categorical_accuracy: 0.9682

















































위의 예에서 Tensorflow Hub의 `hub.KerasLayer`는 `tf.saved_model.load`에서 다시 로드된 결과를 다른 모델을 빌드하는 데 사용되는 Keras 레이어로 래핑합니다. 이것은 전이 학습에 매우 유용합니다. 

### 어떤 API를 사용해야 할까요?

저장의 경우, Keras 모델로 작업한다면 하위 수준 API에서 제공하는 추가적 제어가 필요한 경우가 아니면 `Model.save` API를 사용합니다. 저장하는 대상이 Keras 모델이 아닌 경우 하위 수준 API인 `tf.saved_model.save`가 유일한 선택입니다.

로드의 경우, API 선택은 모델 로드 API에서 얻고자 하는 결과에 따라 다릅니다. Keras 모델을 가져올 수 없거나 원하지 않으면 `tf.saved_model.load`를 사용합니다. 그렇지 않으면 `tf.keras.models.load_model`을 사용합니다. Keras 모델을 저장한 경우에만 Keras 모델을 다시 가져올 수 있습니다.

API를 혼합하여 구성할 수 있습니다. `Model.save`를 사용하여 Keras 모델을 저장하고 하위 수준 API인 `tf.saved_model.load`를 사용하여 비 Keras 모델을 로드할 수 있습니다. 

In [13]:
model = get_model()

# Saving the model using Keras `Model.save`
model.save(keras_model_path)

another_strategy = tf.distribute.MirroredStrategy()
# Loading the model using the lower-level API
with another_strategy.scope():
  loaded = tf.saved_model.load(keras_model_path)



INFO:tensorflow:Assets written to: /tmp/keras_save/assets


INFO:tensorflow:Assets written to: /tmp/keras_save/assets


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


### 로컬 장치에서 저장/로드하기

원격 장치에서 훈련하는 동안 로컬 I/O 장치에서 저장 및 로드할 때(예: Cloud TPU 사용 시) `tf.saved_model.SaveOptions` 및 `tf.saved_model.LoadOptions`에서 옵션 `experimental_io_device`를 사용하여 I/O 장치를 `localhost`로 설정해야 합니다. 예를 들면 다음과 같습니다.

In [14]:
model = get_model()

# Saving the model to a path on localhost.
saved_model_path = '/tmp/tf_save'
save_options = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
model.save(saved_model_path, options=save_options)

# Loading the model from a path on localhost.
another_strategy = tf.distribute.MirroredStrategy()
with another_strategy.scope():
  load_options = tf.saved_model.LoadOptions(experimental_io_device='/job:localhost')
  loaded = tf.keras.models.load_model(saved_model_path, options=load_options)



INFO:tensorflow:Assets written to: /tmp/tf_save/assets


INFO:tensorflow:Assets written to: /tmp/tf_save/assets


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


### 주의 사항

한 가지 특별한 경우는 특정한 방식으로 Keras 모델을 생성한 다음 훈련 전에 저장할 때입니다. 예를 들면 다음과 같습니다.

In [15]:
class SubclassedModel(tf.keras.Model):
  """Example model defined by subclassing `tf.keras.Model`."""

  output_name = 'output_layer'

  def __init__(self):
    super(SubclassedModel, self).__init__()
    self._dense_layer = tf.keras.layers.Dense(
        5, dtype=tf.dtypes.float32, name=self.output_name)

  def call(self, inputs):
    return self._dense_layer(inputs)

my_model = SubclassedModel()
try:
  my_model.save(keras_model_path)
except ValueError as e:
  print(f'{type(e).__name__}: ', *e.args)





ValueError:  Model <__main__.SubclassedModel object at 0x7f255805b340> cannot be saved either because the input shape is not available or because the forward pass of the model is not defined.To define a forward pass, please override `Model.call()`. To specify an input shape, either call `build(input_shape)` directly, or call the model on actual data using `Model()`, `Model.fit()`, or `Model.predict()`. If you have a custom training step, please make sure to invoke the forward pass in train step through `Model.__call__`, i.e. `model(inputs)`, as opposed to `model.call()`.


SavedModel은 `tf.function`을 추적할 때 생성된 `tf.types.experimental.ConcreteFunction` 객체를 저장합니다(자세한 내용은 [그래프 및 tf.function 소개](../../guide/intro_to_graphs.ipynb) 가이드에서 *함수 추적은 언제입니까?* 참조). 이와 같은 `ValueError`가 발생하면 `Model.save`가 추적된 `ConcreteFunction`을 찾거나 생성할 수 없기 때문입니다.

**주의:** 적어도 하나의 `ConcreteFunction` 없이 모델을 저장하면 안 됩니다. 그렇지 않으면 하위 수준 API가 `ConcreteFunction` 서명 없이 SavedModel을 생성하기 때문입니다(SavedModel 형식에 대해 [자세히 알아보기](../../guide/saved_model.ipynb)). 예를 들면 다음과 같습니다.

In [16]:
tf.saved_model.save(my_model, saved_model_path)
x = tf.saved_model.load(saved_model_path)
x.signatures









INFO:tensorflow:Assets written to: /tmp/tf_save/assets


INFO:tensorflow:Assets written to: /tmp/tf_save/assets


_SignatureMap({})

일반적으로 모델의 순방향 전달(`call` 메서드)은 모델이 처음으로 호출될 때 종종 `Model.fit` 메서드를 통해 자동으로 추적됩니다. 예를 들어 첫 번째 레이어를 `tf.keras.layers.InputLayer` 또는 다른 레이어 유형으로 만들고 이를 <code>input_shape</code> 키워드 인수를 전달하여 입력 형상을 설정하면 Keras [순차형](https://www.tensorflow.org/guide/keras/functional) 및 <a>함수형</a> API에서 `ConcreteFunction`을 생성할 수도 있습니다.

모델에 추적된 `ConcreteFunction`이 있는지 확인하려면 `Model.save_spec`이 `None`인지 확인하세요.

In [17]:
print(my_model.save_spec() is None)

True


`tf.keras.Model.fit`을 사용하여 모델을 훈련하고 `save_spec`이 정의되고 모델 저장이 작동하는지 확인합니다.

In [18]:
BATCH_SIZE_PER_REPLICA = 4
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * mirrored_strategy.num_replicas_in_sync

dataset_size = 100
dataset = tf.data.Dataset.from_tensors(
    (tf.range(5, dtype=tf.float32), tf.range(5, dtype=tf.float32))
    ).repeat(dataset_size).batch(BATCH_SIZE)

my_model.compile(optimizer='adam', loss='mean_squared_error')
my_model.fit(dataset, epochs=2)

print(my_model.save_spec() is None)
my_model.save(keras_model_path)

Epoch 1/2


1/7 [===>..........................] - ETA: 3s - loss: 12.1411



Epoch 2/2


1/7 [===>..........................] - ETA: 0s - loss: 11.6855



False
INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(5, 5), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f25341793a0>, 139801521600336), {}).


INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(5, 5), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f25341793a0>, 139801521600336), {}).


INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(5,), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f25701bfac0>, 139798770240176), {}).


INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(5,), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f25701bfac0>, 139798770240176), {}).


INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(5, 5), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f25341793a0>, 139801521600336), {}).


INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(5, 5), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f25341793a0>, 139801521600336), {}).


INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(5,), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f25701bfac0>, 139798770240176), {}).


INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(5,), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f25701bfac0>, 139798770240176), {}).




INFO:tensorflow:Assets written to: /tmp/keras_save/assets


INFO:tensorflow:Assets written to: /tmp/keras_save/assets
