diff --git a/litellm/litellm_core_utils/llm_cost_calc/utils.py b/litellm/litellm_core_utils/llm_cost_calc/utils.py
index 65e77f014a..785976ed31 100644
--- a/litellm/litellm_core_utils/llm_cost_calc/utils.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@@ -354,7 +354,7 @@ class PromptTokensDetailsResult(TypedDict):
     image_tokens: int
     character_count: int
     image_count: int
-    video_length_seconds: int
+    video_length_seconds: float
 
 
 def _parse_prompt_tokens_details(usage: Usage) -> PromptTokensDetailsResult:
@@ -400,10 +400,10 @@ def _parse_prompt_tokens_details(usage: Usage) -> PromptTokensDetailsResult:
     )
     video_length_seconds = (
         cast(
-            Optional[int],
+            Optional[float],
             getattr(usage.prompt_tokens_details, "video_length_seconds", 0),
         )
-        or 0
+        or 0.0
     )
 
     return PromptTokensDetailsResult(
@@ -415,7 +415,7 @@ def _parse_prompt_tokens_details(usage: Usage) -> PromptTokensDetailsResult:
         image_tokens=image_tokens,
         character_count=character_count,
         image_count=image_count,
-        video_length_seconds=video_length_seconds,
+        video_length_seconds=float(video_length_seconds),
     )
 
 
@@ -561,7 +561,7 @@ def generic_cost_per_token(  # noqa: PLR0915
         image_tokens=0,
         character_count=0,
         image_count=0,
-        video_length_seconds=0,
+        video_length_seconds=0.0,
     )
     if usage.prompt_tokens_details:
         prompt_tokens_details = _parse_prompt_tokens_details(usage)
diff --git a/litellm/llms/vertex_ai/multimodal_embeddings/transformation.py b/litellm/llms/vertex_ai/multimodal_embeddings/transformation.py
index 2cb2ac9ed8..d82c2bebb7 100644
--- a/litellm/llms/vertex_ai/multimodal_embeddings/transformation.py
+++ b/litellm/llms/vertex_ai/multimodal_embeddings/transformation.py
@@ -265,7 +265,7 @@ class VertexAIMultimodalEmbeddingConfig(BaseEmbeddingConfig):
                 image_count += 1
 
         ## Calculate video embeddings usage
-        video_length_seconds = 0
+        video_length_seconds = 0.0
         for prediction in vertex_predictions["predictions"]:
             video_embeddings = prediction.get("videoEmbeddings")
             if video_embeddings:
diff --git a/litellm/utils.py b/litellm/utils.py
index 45090a78e2..535ab25782 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -5519,6 +5519,13 @@ def _get_model_info_helper(  # noqa: PLR0915
                 input_cost_per_image_token=_model_info.get(
                     "input_cost_per_image_token", None
                 ),
+                input_cost_per_image=_model_info.get("input_cost_per_image", None),
+                input_cost_per_audio_per_second=_model_info.get(
+                    "input_cost_per_audio_per_second", None
+                ),
+                input_cost_per_video_per_second=_model_info.get(
+                    "input_cost_per_video_per_second", None
+                ),
                 input_cost_per_token_batches=_model_info.get(
                     "input_cost_per_token_batches"
                 ),