From 539f1661663edadc2ef4b61e632eb2e62b4ac775 Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Thu, 26 Dec 2024 19:05:27 -0800 Subject: [PATCH] Support budget/rate limit tiers for keys (#7429) * feat(proxy/utils.py): get associated litellm budget from db in combined_view for key allows user to create rate limit tiers and associate those to keys * feat(proxy/_types.py): update the value of key-level tpm/rpm/model max budget metrics with the associated budget table values if set allows rate limit tiers to be easily applied to keys * docs(rate_limit_tiers.md): add doc on setting rate limit / budget tiers make feature discoverable * feat(key_management_endpoints.py): return litellm_budget_table value in key generate make it easy for user to know associated budget on key creation * fix(key_management_endpoints.py): document 'budget_id' param in `/key/generate` * docs(key_management_endpoints.py): document budget_id usage * refactor(budget_management_endpoints.py): refactor budget endpoints into separate file - makes it easier to run documentation testing against it * docs(test_api_docs.py): add budget endpoints to ci/cd doc test + add missing param info to docs * fix(customer_endpoints.py): use new pydantic obj name * docs(user_management_heirarchy.md): add simple doc explaining teams/keys/org/users on litellm * Litellm dev 12 26 2024 p2 (#7432) * (Feat) Add logging for `POST v1/fine_tuning/jobs` (#7426) * init commit ft jobs logging * add ft logging * add logging for FineTuningJob * simple FT Job create test * (docs) - show all supported Azure OpenAI endpoints in overview (#7428) * azure batches * update doc * docs azure endpoints * docs endpoints on azure * docs azure batches api * docs azure batches api * fix(key_management_endpoints.py): fix key update to actually work * test(test_key_management.py): add e2e test asserting ui key update call works * fix: proxy/_types - fix linting erros * test: update test --------- Co-authored-by: Ishaan Jaff * fix: test * fix(parallel_request_limiter.py): enforce tpm/rpm limits on key from tiers * fix: fix linting errors * test: fix test * fix: remove unused import * test: update test * docs(customer_endpoints.py): document new model_max_budget param * test: specify unique key alias * docs(budget_management_endpoints.py): document new model_max_budget param * test: fix test * test: fix tests --------- Co-authored-by: Ishaan Jaff --- docs/my-website/docs/proxy/customers.md | 4 +- .../my-website/docs/proxy/rate_limit_tiers.md | 68 +++++ .../docs/proxy/user_management_heirarchy.md | 13 + .../my-website/img/litellm_user_heirarchy.png | Bin 0 -> 27913 bytes docs/my-website/sidebars.js | 23 +- litellm/integrations/prometheus.py | 8 +- litellm/proxy/_types.py | 48 ++- litellm/proxy/auth/auth_utils.py | 9 + .../proxy/hooks/model_max_budget_limiter.py | 30 +- .../proxy/hooks/parallel_request_limiter.py | 7 +- .../budget_management_endpoints.py | 287 ++++++++++++++++++ .../customer_endpoints.py | 9 +- .../key_management_endpoints.py | 21 +- litellm/proxy/proxy_server.py | 236 +------------- .../spend_management_endpoints.py | 4 +- litellm/proxy/utils.py | 58 ++-- litellm/router.py | 2 +- litellm/router_strategy/budget_limiter.py | 84 ++--- litellm/types/router.py | 2 - litellm/types/utils.py | 28 +- tests/documentation_tests/test_api_docs.py | 5 + .../test_router_budget_limiter.py | 36 ++- .../test_key_management.py | 65 ++++ tests/proxy_unit_tests/test_proxy_utils.py | 87 ++++++ ...test_unit_test_max_model_budget_limiter.py | 6 +- 25 files changed, 764 insertions(+), 376 deletions(-) create mode 100644 docs/my-website/docs/proxy/rate_limit_tiers.md create mode 100644 docs/my-website/docs/proxy/user_management_heirarchy.md create mode 100644 docs/my-website/img/litellm_user_heirarchy.png create mode 100644 litellm/proxy/management_endpoints/budget_management_endpoints.py diff --git a/docs/my-website/docs/proxy/customers.md b/docs/my-website/docs/proxy/customers.md index ba9ecd83dd..2035b24f3a 100644 --- a/docs/my-website/docs/proxy/customers.md +++ b/docs/my-website/docs/proxy/customers.md @@ -2,11 +2,11 @@ import Image from '@theme/IdealImage'; import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# 🙋‍♂️ Customers +# 🙋‍♂️ Customers / End-User Budgets Track spend, set budgets for your customers. -## Tracking Customer Credit +## Tracking Customer Spend ### 1. Make LLM API call w/ Customer ID diff --git a/docs/my-website/docs/proxy/rate_limit_tiers.md b/docs/my-website/docs/proxy/rate_limit_tiers.md new file mode 100644 index 0000000000..e7dc075bd1 --- /dev/null +++ b/docs/my-website/docs/proxy/rate_limit_tiers.md @@ -0,0 +1,68 @@ +# ✨ Budget / Rate Limit Tiers + +Create tiers with different budgets and rate limits. Making it easy to manage different users and their usage. + +:::info + +This is a LiteLLM Enterprise feature. + +Get a 7 day free trial + get in touch [here](https://litellm.ai/#trial). + +See pricing [here](https://litellm.ai/#pricing). + +::: + + +## 1. Create a budget + +```bash +curl -L -X POST 'http://0.0.0.0:4000/budget/new' \ +-H 'Authorization: Bearer sk-1234' \ +-H 'Content-Type: application/json' \ +-d '{ + "budget_id": "my-test-tier", + "rpm_limit": 0 +}' +``` + +## 2. Assign budget to a key + +```bash +curl -L -X POST 'http://0.0.0.0:4000/key/generate' \ +-H 'Authorization: Bearer sk-1234' \ +-H 'Content-Type: application/json' \ +-d '{ + "budget_id": "my-test-tier" +}' +``` + +Expected Response: + +```json +{ + "key": "sk-...", + "budget_id": "my-test-tier", + "litellm_budget_table": { + "budget_id": "my-test-tier", + "rpm_limit": 0 + } +} +``` + +## 3. Check if budget is enforced on key + +```bash +curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-...' \ # 👈 KEY from step 2. +-d '{ + "model": "", + "messages": [ + {"role": "user", "content": "hi my email is ishaan"} + ] +}' +``` + + +## [API Reference](https://litellm-api.up.railway.app/#/budget%20management) + diff --git a/docs/my-website/docs/proxy/user_management_heirarchy.md b/docs/my-website/docs/proxy/user_management_heirarchy.md new file mode 100644 index 0000000000..5f3e83ae35 --- /dev/null +++ b/docs/my-website/docs/proxy/user_management_heirarchy.md @@ -0,0 +1,13 @@ +import Image from '@theme/IdealImage'; + + +# User Management Heirarchy + + + +LiteLLM supports a heirarchy of users, teams, organizations, and budgets. + +- Organizations can have multiple teams. [API Reference](https://litellm-api.up.railway.app/#/organization%20management) +- Teams can have multiple users. [API Reference](https://litellm-api.up.railway.app/#/team%20management) +- Users can have multiple keys. [API Reference](https://litellm-api.up.railway.app/#/budget%20management) +- Keys can belong to either a team or a user. [API Reference](https://litellm-api.up.railway.app/#/end-user%20management) diff --git a/docs/my-website/img/litellm_user_heirarchy.png b/docs/my-website/img/litellm_user_heirarchy.png new file mode 100644 index 0000000000000000000000000000000000000000..63dba72c21d169c9cf21fb6bd6965747a6762d1b GIT binary patch literal 27913 zcmeFZXH-*N6gC(jh!F$@6_u(45vhWT)QAm2x6+H!q)QDg2}R}QRVhKLKx{N=N|ly? zg$_}w5Fk<`1PDEZw7Gewd~1HpTC=`2Yt8(aA12;=bI(2J+;jGR_OqW&qLqcw!2>4` zfIy&waO3N@K_IXo2*kI4|32VM1;jTK_<}q(whab>1jV@jctAP1$AObP!MBaBf-3qY z7l8w?r=hta2=qBwkokZg1oF*?UpKrP&a=$&`Q$MlArnsW>p51VeCpTrx_3AC(fQA$ z86T9<Tn|=RG)#ARqlWs~6_VAZRJ<5j(Q!UApjEHw&qG)wHzg^4qw}-wmUdclrJnq*%2x{kc zVu;F``YI=C41Hro6{{-?MT4q9dtA+}ku6@D- zl6z(W{&O&Dr#bcP?rw+t0hf$Pjjbn(?|xw>cc7KHqgL2$>K=Ru?Cyt%}R_c``v?-?|G!W>;X5YyHfy}EwF_=`5cRBKWo9{)R z06l2708Tx%oAN5;qZWhxh;(B ztU88|d;~36vsgvxhe5%Xz)7vhD=_x%zTOhQYatjR5GA0WgeDUD8p@pLaedBg;%hb3+E;lb!?^Cz`f5VjnPh8y>Gz@#S3hUd6c?)~+vqOw%YZ7@g(vd_fDha)7`pmfJS?88*SZ&B(%e2wj=4j{57cVnk zhmO?1Uu@ME!OY7m@qM%IX5*`S(a}Pn81s#jpy&n7%9b4%%M$pUd0zQBP#U@QYc%Mi ze%2A2pd6xqQ~B#NgY9is7`3LV4Rf{eZI5a6A{c9Nbu%78ZlVeBEvz<0=hF}*B!b2= zRcjgQBVr3-XtxG&hsTCmj<`y%s)O>R`CEfz20VFus@J}!JJN?GkR~%pV?}I4pk+*V z-e%k)zG6wqbm;86Xuk9GU`+t&D<_r-1G%g!UITiG=@)Ws(ETIB`fDTrbmdQj&qvPI zpd~_{Cjwi@^h!)Qwx1sD(3s*C)X`!hDSSp`yzRQ7?(tr=smWwDpU#Fs&&Nv@MGDP*{$XDj%Phb9l?POP9dHu(KuftyPD{;CC%Ny>B1LR}Xm%)OyC&9PAkbxv8fD-Oze0gU^~l1}uvW`8 zYz8Q_1EmcYb=zB7t9)j^EJ4#v8J!(?I&AXpjm~|CMfIoIp1I0@MTReDppKsNx?NQX zmxZ?Rf+VH5TQ*VDjocpveelVTShtgeXmkyq-QCAn z79u<`ay~?#i-m_ln@W8|+Deb#G~lZq`6exes3Qi%u2?qO!%Dt=i_V}8N4{}V_+n9Y ziU;8~HXL~h>)sfhL1r^&=dKr)5cUSmmSjI~W!mPQ95FDrvn=e|+Gd5z_z*W8>yQn8 z%jo;Wwtc_x_*z1=$29s%Wq|-_6Amo7t~eD{#E#f;TF=ps*Z(w-^Y0J|?Es*8V=gmk~h4#UMVahe*hHI$2HIP4%<;FV8cBbV>Cbf$Xz-QsP!h`5MmTlj)6~{ zJPi6{tbUSQO zu|B{#tbuEn74pL$58qn+mU_SX4H(P&K-DIIY4w&0SHHATUT){~8l`v%<6fWvdtbj0mH9u#Td$ju!*-UY~;hn=bjjz>OHJe7dJ=W8@qxUQe`@{AGlpb16+3 zm~_9Kt?Cn|S5Lvvpsag#$3W_XK?2%|aeAVEROTmw369Zut)bDL4GWGvlhn^OkBzOy z16opI=)YZ)9e**$ip)4;1XNfcdy@den_hzM$8U5ii5Hd68JcIZIwQ~2=GgelbZizJ z{bpjXVP@xV7xW6S({Ad>I_<=~f~3$4Ym$4EH(N3jD%9R%--lAE#jRhUBO4YAUpJ0G z5`1ELL4(GCx6j`Bc}=ADxoYFzPlNORL2+Nft&q9R-*tZW)+Q;SW?qI_aNotzGNmiM z%@I(SiVxT0IN*FpxOJ5!Fe6i^h$)1Rp@umN_34H|d@GL?FB$$-I7rLrw`*vsSgD5O5w%-mK7xs(?_vDmK(%uPTXszNhir z;Z*lIWL+ut5pRs`DFEd4nxu@1bXuQo^NJ()rhzozPY%?{8HO6z!+9TSLta$pDr*t5 z=dSbqN{0$oSt{f{{(1`QsVnyVOmVXECgA8-3`%dm7*7Uw7p%6A1~#s@OEiVvO+f=| zPxZ_#%;ggT9a^O5U|OzM^#t_D3<6C!ZOewSqX7_k&NbCVmitl>X+|Ow2FHAedR?I~Ljnf@_1_@4MlryD&?e zE{gD&IfP3I>*I=KWzih@Hms?8@mj1~_R50@!TrgHZCi?Nu2d=!t3u%PO4LA=X{qQUAdnxX2=z(TfkGYGp zyO7d7xkDf3-r)5YqyEf1S95Y$okks_N8Ia3u68R<#xm(*=R$4%I@?3$w%x7r3qda!a^_IO+g(sl) z9&X<>{p@(hAn^FQcEnKXJ z8dnwSIOIaSZf#@}l$j!(9A3}@H{#LEQ#OuytvgJST3s{#_;&9M{4p+rWjydf$D<7M zlx@sYW?`oTP5gA%G^qq?3%Zi50S0;4@lwDckXKsoT))blLM&-2|A96%wPb)`DoVFC z0;w%DRp~S@ZE5>kqWr9WNgkIGAY@VBEL1sbanIAwQaBO?sHyHR_mkv zfJgkF336(k4`j<0v9=}{+%G1~XKzV4wxtqy<_ep7qL z!Zw^eQh!pt=)VB%e_Y_kUf9TwJi$v8XXo$?;I}|4hakcm#+q&rK5lqtiRIS~YfjYz z|M!4TfB0l{xeGvP;^{S#iBuka9%U-e;m%e!Yp=d0Y>E=U4A^2j>15t5Crbo9k zy;}kvkGT?`z4p5vH;iMx45&$)+NrB!RI)!hG7)SGMhYk>%&jJ#Phct>%Z3B?}7vrxHY}{gR4C`l>T?0*#1i1jN2ITUr zg}KqQ!UY%3djpw`&@1q`KNwThMNyE9N!nJb*=u@xVm$C^u-hh5mu8^@#IDs`{O8dd z1c?(={qn{!di{r(7}&HcXkbQ`U>jEQ-B486_7X^4m)rb4=s%>ECGmc+gn zHOoTcpa(`IhDb=-6QC><@M6?|39WvHW{Jj|9Qyl{y2gEwQ3CMZeN((-{>)U&mOD>J)rY1-)VW;~0vfh?_Exwkjr(XM_qBYtN~r!} z^#b%M61e-PQ0Qef?z5ESfL%7BF*F^`9g;GalxcWaeC^D6;7ADA^6HyHp`G5`&H`;@ z-GXe0ZWUxVycM=px^|vdy^?!U@tKaBCh&`x2yR)jg58~bo`ARGu0*2i>!UbrB=Z8*}IkG*Ls2#w(NO}SN^*wt{>T5f9 zDvY?kfZ|bn?Q#%s1Eo5Egj_l}{vuK*pj`d>O#%YW;Y8 z=}?{Ywew%Q;m2CLxxKS_rt@SGSW}mXv3vsK0BCLaoy!3tktJSgGa=$MV68Vybhn4@ zG?D68i1qcQ6^$SBqE;JLrW0gVag4PDnBI9CQWGMe3Bab}nMeTtg_RHaH?Oyc{kkC@ z+GFNZ2WE~sMfqv<0lRAy$p*reI@e_Vx~Tr3u+@n%gY6Q_PzsfzAebLMlehy!p{f>{ z?(X!l_GRl4&R0RT8UW3FdR^!iRnffB?LW1A=R?^k^0oPcz5i+fv}UVSf_mNtA{&4d z?bptqN$L4!^`gv(cM1F*k|8!k3K=wC0k9(_ZkcHyuXw0_BFL0BHs*u;XFen~XfKDkgX(UHLa2jm|t$OtZb8zxib$$EPu(iVfbZ)pvy-fkIMP z#n%l-vi)8_uVZWg90()tA|OEvw8S}e#6t4zD_WqZV4yr?un)4W#rYG-D%GG_medX>-(5#O&*aeT-gBLI@+p?MffslaOD*e>H=_<$t>3 z^sKJ}0Gf2KP66w6uT`1;d0(r8X=&_4s2({tc)UviJ5hmW@2#Fb2Z;k8N;cSg6b1CB zkpM8Tk@CMytY|>|6^b4m);XIP&nzcXIWIK(|MFtlYcG1S;8{rTX6GdB^TaT0Am zt~cn6+#X(en;^Xuw-xwD&nq8Sz{`lvm(}k|@U_o)J#s+Xx_sLLm44zb{A%;wQt2dF z4Y&J{W_%i$a6HyXI^qNI8T14Ys>ow}i^13IOz*;ZdwEp^K?^Bwg&*Ly?IGK`D&}kH z(5vRIXWQwoTEiPQUV&F3S(cZlpR6M7ilN~an#?V^6zY4@RX=$Pz4h;^$Eo#~I~l&F zQwnD-H%=4M^*VJ1GTXf6Au0C{CVAOvSh$X_54k4ATOcX`Z0u2&v+dBs=dvH2y}2hUX>Ifjk`;`AVdej^ zP|pj7=WLM~fr?*)1|G_87FNb4N6*PkYE&^z96pfZ%If!y0~91mUg5q{kMUJyq?nu3 zSVFTZ9JPfeYno$tY|-%NalY5;R8-DkF2veCxB!kh;aPmmrB)cQPHekGnN2UVCF;N_iH2AF)?Th&Pdb zLCRc8eoNwgXQz7C=V_putnbCNCx zj1PeZA2wLrJNRS2x;`-Nb%3Gc9jss{%r_7K<7ba`<7qwS3QWfcV98zf0K4VO^GpZ4 zz;#`P@xaAffW>o}IssgyV3a|Zy81>H}`-785CepO50rPBOo3A#xY_( z-P9uZPit%)f;ttpR8mwIv9b0|1VJF@NBMI(7h31}zi8Ymv)2+5JyN5eCCN;^nteG^ z0p1|r%Mjj}T1tJ@5@IiBM1U@1H+cNt)I42;s>D`%zJAoWCP7aV*o9!KJP_ZbrHk^y z#{XTg{OWQMlL9||waO)o+y6tr2z{GPBqu^g9pm2~PrF|1R`&LIa`S{CY9>)({Bwsi z(O@GrxkgGiECfqtW*Qm5*1wAp0pm4^%y+q1DHHkk?J5$T{~4UC*1UFA3*G-ZyJD}V z_{tWrzn2XN@8ib79rR{cK%LwsZg=MSE4JP&vI{Ym(I|(F*wjf(uy%?luwMXR z8L6eeiZS>yGtYftQ%`L>!nvIMM9kuEhGS1(U@u~tx8Mh`#ST$G0-=c}$`A6%eA3FI zkw{LFiv_9?R^=iXJ!7SKMDwiFBxkzWa})Z;RBRjUPmLf%jhkyt4)H&OxNQ~_@M}-i zLJS}1?f!{h!7)}Tyn_JsS!?g#;678OX@1e&QZ{?BY8jjYKO+CAd?4QB$ldC_-q$pU zq-H?fc?{S%CcDEX!+8^$Qi1I9jL7rrL)aSeRg-k>9{)25(2|p zAHob)4q|fsp8uZYH(hFTaGV;r3+r1k>fbc|@4 z`T&#MILZ=+o0h-F9Mnc>8-zM|!zeTb3U}7W7oeajeOv3DiG+jsVUNWMr8hG7q?z^PJ30^&_(ja7v6M!+Qh^I&b)ZAAMALp+9gHHyY9)h}9i9 zx;x**&RYdHLYSCG$!3{vF2#CfL`_AILu1`W(zj{$M~9dYn@u`5+WXP;bF4zy^Y-TM zM1ujPth#Y@tPWfO$-uL>D-n=?%QP`hx_547*v_kuB`-UhXSzEC{x~|xQ1)!r|2#x? z&0YXp_p+0McIum!k+hXwMtbqF<7554&L~x+xMRq#8N%5PMOZxh5^I&p_8-0dCh1O{ z*X;(wDNo(aT&(SgEfG6U{xWxbVY5fI$@g>U*9ym`V`*x)C?DBJ2Li4p-9W9T&vjB} zlVLdfn=Iwu;ed@&s&R<>Z%T|XysYnrcuC(}!0pa<&;1^rTnC5ETCDC+NhE&n0<@W? zGi;55M)aQVsF{cUwvGT zkG!EDdaAi!uKc8iB|y2pxdcf8Z;a(L+3ryv1D*mKhyKBjqC0^ryT|d&26ugNH>|z? zx8BC)!9fdLOigW@zc_KDB6Zyd{%4DZn=8MseJZu!@EJUZPDQrR;esT1>2^!dY z+Fvh@It-dyv((FOCJon)IyL<^y4EfF@x}oOBP>5i&g-nJ5@8JjaP(qsg48rOGs`zY6hQ&&}s3=ff7VJ;M;$ zr5*-t{^_ToKYN*w1+o3sr7`K9&9)Z9SkDs>-s%^2mT?58a;XPit^5GSs&h|ix3Xzq z*!yjzv+yC?0P{2w))&qH+3S}TOKr=&u_!;%JFrn%j*;@_GBIs-h?GAx2D~ymHzH-% zjaZ#$3jbtau2x@ONl*+MKyd24@`rB;a*mRZk)FISiBTj7$R%<_X+|X@uS~ za#Wlmv6|5JtX=}sjFCs8=k3U_zoGx~eu27f1xu5qxznyP z44Wuw6p1+em)umQz%!GK+IsIVA^HkUYa*@Sdcx+Jd0y4bp&rl4Y<4ndH<^fKFJkMk z96Hw7VKlHqYE(smk?Pma42U8U8~yrqs$q*(nms)lsKrcF39FC9?3>v;jinWpvV}fY zJ^^>t>es57BBwHAzTopGx+-05JE&Av3x2e`Y3EeeOUX^0Bp|!#fS(!t#A%XkVVHu{< z)&sHxbswmpO%kB55Cji;y? z*>a%)ZLOUm~@0jJ_U?i3LiGfi%=T&hq?ji@4A!?(O(H}mTBXAjNpumIFi6_qEuo8wcln8W76tCcnZ znPofa4;R~PJ@bb9VH{>#^U*Bo@6WR-bTn%e-I5p7t+?`3-02ZFEVf>HJm#JmIyf0Y z&Wjg~H`DazNm==~2DLj>WH2oC?VF#a-!-Ixbl*F*9#cxZom2Ns)#kOYKae_rh=vGc zdx)Mu)ac*n>uPez3E-^!(KR+MIRG|%-3~bRPde$BzfdZg0G41u&yGA$4!E*Jh5RpB zot-=P@;F=`Ty@Rzv}`PE*xGs0LU*O6tvTC3UI@rLInfLZLrsnVNvn5lRhpcm)(*e; z$DKKQqfKO=%9gOeeDI7bWTIL{5z)y6AXlg#feV}bEaOe(0zEyd5xocAlmi)LXT) zJf>;m^nu@SwcDthZ*Q~g1K-#2eDAiDdG`}sAZh;`%<+;^cXCQsokJosueWm%0a;&E zO84u{!79-2L%ANK#6#?wd-UzMamy-XUNA#BB4BFPGp{J}^VsoJ|^?nNY=WHa) zHA&lJJv45a{g z#q*tcDK)a{bEiPQFHWkw)#0;HNNi}zgI>k#8;*EvC%Kj2>F{!5v2yNu&bH-1Km~UD zLqMz^=7@2SY)_|LxlnszoY`mq28$9F))I2v32g5F)}@iQ{I?ReIXuZ3?vA!~LhDw^ zC>kC&rTkNqQT(CZ4t?zL1ut7r*}vRH?9Z1CjLOu$ac=^s~EVZidfp^hz=3?F693giCXc zP}b?KdIdwKohh$3C&&Tt(RHI6?U2-yb{a^iTU2Q-Z(-MeJa$jY&@TAi~HwuqK_3qeP-yA|L*@n2SJ?6O|D z6;BIRPbi~HR&2Y&CC(txUs#lY06V!kB*Dw^DRzN{fHj9(C>%&fjm$HYWm&M^ z9%s~jwR)kH%I5K&i}hZ>r<)UZzGk`cC}mDPzLQ*w=6v<>XkQg~_O0g-(L~4VLN_%4 zK^*Z15C*As{`jUl^(#xm>tl9)w)M|BQJ3MR99faV?jB&dKMwxWCO~v0?T!3tnrOf? zvCOov#*>UO3?;VljJh|U64bVJ19K#N>(>a5z5P@`Q&Ic?x!K@muqaptaBW2SqP&qQ z_JWp{#}%E}NDjlE(JEl4JO66mCy$0>;Y%eWxSiS~?plSUO%lTiA@!KR*+#3J5nkm0 zYCiGY=pfgd@()sO58SbI{PG!OnxgDdWg_}PG15^8`ll22s?olyH=JI{X`HO{v}9~m zIIgU~=xHxcq~NFZf;uWm*MfDN_U4_PjHN+kcQIf*E$W+u03CgMl)v{08P8#pgYWDZCl@B?y+Bz!sS{!^E%^Cc=Tnsn9TE@x^|z%rsCHTd4d2r z4H70lcXs#^d#E8WMHk~$Qq%renN_ig3{I(w<^&o6pO&B9&_7&u!%uxvPOKD2_-*Y4$R-BtxD^-$=iHDDAt*V)d-M_QM=YthoFBiCxuu{}?uq`g zt;~{eS@^f_klu@3aS_%C(Lu>E;9*5HJbKzT?b@3P!GZBZS+IsgE2Q|Nz{@nK;DRH+ zz=;5g^&RN3Bj}6jA7G%H6*=Nw_W;EX;_yZ%C;r91yuC$@XNozH*65F4u2i|K z!jJVf@d+HucRbP>-Sg;dYjmkwEJr|{#1!G){y&)q{J+W`0^Yjy*_UmR9djmAC4l{T zrwhH76b9AXDuO+7W{i|h?o~}~#LKYjcO=lew{|GL%*&#TNCT?3}*#2-44)-W&hWf!3GIm5XIg>phMOw z7(LIaa)=qRPh@`%fmM!uM2#j&6#W6Wqa)PSrO}xL^lAWWZYJ83HSziRAP!A_Ezt8f ziquBU-)mP|iPM~F+#S1y-iZ%eh)Pg7utyBJUs>DcJ!0PzP9~z5e-Is#8xdP%dV8DO z4@)Tr56sE!2$iVj7wpfAYO@og$vz{gu6f^oOeaAA&;TfDTF;nXVZZ=Aq{zN*^dNta zmH|Mw>ae%1aYWRn%fnNHdZFrH*c|EHinu?or6)$sUytJH32L>R?dRw*9CM3C92MR- zesK~?jiSA7+8&~1bJRo(rlQmj->RzM(CaxpGm#t^f<@}7pw(md{OhMoov{@|A@!^_ zwyNO(iByTCBur9zIkoYIXt$hZ%hRVi{)}*t=%`(Su(Il`sU2V&x8%qlTvE`4N4} z9iyfo&k^6Ty2o1;ei9hq4o(kD4;PiE%Y;0s)aN|;u_&rcvsc?f&QY5Slx`SP;=n{t z9D?zO0ba3|bO_sqq`ZL*79BN2x4@8V>+_Q&A0S>^_iS9mv07w8DShng=9$|Lk)vpT z@m%&D0kM-L3XZTLSI*C!%R%-$h0{ zO0{f>utzYbE?1}p7-NM%>QsQ-bV-wMWw(hqQ&yvmoX8fD(OWOD-p)G5Uy%DbTI?mG z=PJS`@`ndPI7P{f^&f zwK=*Z9eSuJmYJC;|^c#N>zTBsLZ7Mz%Wp~}-@mly+ zCg6k1Kip8ENk|MS@PT@Dx%oaf$Ol3+^)+!!Y&E>7fo)6GuS0Ha)rYkOuC4_<0skU} zuT_fo96mF=;raaubpIZa&Vm#igoXCjB~r33ue8mp)Z<+_v)KU@(KB$28#Lk7QD#6U zu9OF4lxe^nynD!#kjuOMqtx%5HmuYmJR47?uC9_(W3&+?V$nrze;BZdtX1}%e;-q1 znup|1?}_#*WUtL>2`l^G!5p!ysG8ykOzx;|-AMUxy$)};-wj=zugpk{uT+pX)Zjej zcn8pOSGods*`h3U<@qicy4ENCu=6;JCEud~iW&X|q)9K0RZ0rQp*Pn#BcZ3WKzk~% zhH{n~hdS!R64|%m<@o844JTau0y=8w#I#+(kyCjbAcVb({{V}1rs#m9Ywexww4gsL zHvUQPyrgi2^Ag6i5MOf+o`OtzDk67FsqE1otZu7>;nA<*Y-f6FprcRRa+Jxb4b;T} z%=9S71#P`#)o~PLgXhMlSpabm?c+>0@sZ(#`*`M$#b}34hL*1oN5>Cp)zrBfxo$#> zQ9Z?MTqGUc^Ex>?9{SR+Bxu8O6sd4U_%-+xMRL|+-t<7o5F^Yizxm!c2#G}T%0Od3$>_1)j zpTEW+BsWlOqT-_N#cps@>H5wn`n;dgmzjT+8p%d7$dDOYsAJX8*=6m0bataqpP-q1 z7jKqmL--ef-WDg3q4{NK4m2OI&#hYxD|N)i53ttzKqmFh>BJb6#v3QgMxS45wl*Il zTtpA5;h8}+9djW@Pk*zmKd#c_pW0kFHl2;gk>+iUo9{2(M$R#E>Nu0P$y?D*WU8_j zX|zEfJKBIM2@XP!8O=1e3L9La@h<@yD+Y&@=q9N}mVq{N-( zC3Ayk4r)!PU#{FC5Vm#71veb$a-GQQ1^tWJm*xdSod^+HREl1nQD5dZt=?;3`zku; z9FghtblCQ?C|{u^c)bbz9-wM6d?I_i7m{jFZ*iYyov z3td}_fA)hG9Q(7i6Gj@eV*UGTQyV!G=PlnEkZ#2N$#rFH&M_xpMMev7^bH_4q1)Mj z07R>kdZGROA?c4gs~Dq#EU%C5UvKVhWa20M?x2xlB$Iu2AG$>&F|L;^T!sRBFPf}y z$^mZR`VQa1OE;e0e{zom^Y+23a#rTG6@EG#nuF$fZ5Sk@PF%}be`#db={ek78F|%0 z#a*RI)p*Ixp^uNzCm1~A^fCJss5fn$uAMx8Q`_XL{^pSOW%3GJyJb%AwHvaKqD8c9 zbs+EQc#iIE7(kw}x45eMCUW2YDzWb!T$Xv)HjZ^*cTnd%@e+B#CN?-b`f>cuMIK#P>=p9dVU6d2NN|sbq`@m6dPTLTPUp_s! zWrVaVseyLxD3oB_t(7`oU0Zr|VgyjYcwEmr6kJ6WX?gYiibcLQggoc)O=YKtZ-jW* zYS{72t<+F*D&gau>s(1l!vS2TZc(LC+?KLd!G3u-d$^dAkrD3j4=ESUa3Z*XKk^3J^*#5d0Js0HzrsMPimAc z;L#qtuQFqUGrfdr8KW&-Ry@HjJ`-QHH#n=s9%)z+;H;AgXj8UXkVAAG%9 zkKdTwS)sD~{#0|V|4Lp!)ZjC78W+s&WN3*r3Gcn^aQ27oQaFtd1}FCf7~{VI_}Xy_ z>o*qy+nq-H;MWt>4jS}?xic<#H>LrAHLMN5?$JAB^!Dfq0)Yvoqb0MiW8NR%=g_;k2NaKXQ#MY8rNJ;CH^lKMoe;4h4I@)A9RQ z_|hz)Xe!8ru@hWb8@61U0M;R~SCfVJi3td2d+IszeqIluMpIr3LtI8Qfg@V&+e&Gn z@6zn*m$|FMs$VFkF&8a(%RC#QN88(wysZIfs*%K{$q(4E575?|7Bp6T8!NmKYY?ty zKo=PYCK-p|j9!al$S~Jx59aw7;5RAR{_i=L&Y0JK#J?mv`v6Ff)2e`1uzh@CZX!GQ zk@N|vQ{=6a57o(o2P!(G#42uVYu+7&Xmb>JZn-BaIil>&jjX+xMZ!-!#K-?Q`B0m2 zTk7=aHe99jnL!!lpQYjm#fFS79Q2nJQrC@<7X9<%;gJBM4?W;J69^9N8Qs=)jNJ}< z-Ob+it>P=_0Z0qUS509ckoOIa>dEo}Dpe|bObkjbFy2iso~YjtcJkHVURqyah_g+i z$d;VDQJlXOuDp4HpBVl1tld+C$XOTA=1cs^DMK`s1mw|^u@j#br|_G73E}a;*r9K-qh5r}s%X@t~+&^dG)*WiRkuehmYOMr0d{99b6#Up)ql^y_RsL5s_^Xj)qI(?HAoC%VCV{6bwfJ=HOmBO z2T&`AA--mHp^}mflU8oS64HA`KoY0Ob-B-jocbC(2DEDm6Vn|e=JZ%V&eTj#Yr#&g zr?bJ&x5OuAzCvOzL#JM~zJIv8swyB&xfU6E@RAp+M~a5=BZTxgO7J8ALhSp0SDUD- z7B2sza-%Tn`!^9-bNGOhMPX^gnQ~2qFQ0SQHYWK*^wf9t2B(4v`rM?rPr@DI9#G*B z+EYT~s&a_RwMmkhzUUU`bAgAo4bq=1+OLv-W(xizDF*ue2+j-r=Os{u@=hNZrdUzw zW@!H)0!SY}$b9Xt8mdarsj7ejw6t1Cj|{elL7tpf4)u5R&nr#$E8RpVnO0m|LQ3^~ zjyAw8C~^nj6I)QB5K{oKn@Rww13j=|vvmC!Air3hp}6&RoLK99-Sk%?Ftf?-@)p%< z4w-?zJeCX;aeRo%OADwgos#jluf{o;DbpM$j}~7yGPkAx?sQtiE3X#mI_qw(;e?*^ zOhK0Eo&$Zl6@+R4QWYY)liW0gK3Vc2E-5Qy)h^jVX0CCbxk+$lD#vczRDop>7v?KM zfr72oQ7Lxg;X%^Ke1)d>i3DOMoR|_i5k+lL+Kb>31ePj4W}8cMi|9JKw+z05wRuSs z>-;Dt5X>S~UY8`Eoaylims0$*}idw5Zay^^6D=C9w4Y zef;E8<3iIQXmXia8(I{4(ODH7lSva49erYT2n%}YO*0|}yMJaHO8ZMG z0Pyq_vO6?4(VUtXS5Vi0RwdAN$P>Ah!A=A=*&)|8gMuyS=D`dVf4?bzY$+E@)0zsO z{Z4s;@p%VS(zyn*+1o~SMcs<8&U^)wgRKwdK2)JO?0duOqmc6og}KG&f}O^e9h9LU z+~0T4$0ObJwY@y`$NF=@FhIvrrevEUn8p5KGNrKJQdE`^LMg8g!&)l>A@=iBpNfV; zT^_Pq{z}8nxK9&-n`%Lhxfwb>9_BYpvJ|~AyzvITT%~^z*~MP?`eAiq?zacgJ72P> zw|;8&Ajn-sWh5xSC*nN02wbh0*3(cLM||zyxcCXVE@@Gx$b-mXU>|>lJZH(5k;mVt z_4L!VDP8A_F+I6amO$PljH`g4221zXuw3+kshMj$HF_a^nZV%Iu_AZB0Xj#*dWq}3 zQQ3yozozfrVD@KomP#vE>q<}-4H0Q8`i$jjiN}Bf;qh@utMxz)TDw8r_%X54wl~qg zrOn5J-L-0+a9tZxVU&@U7MRbdjcriE`t?Ed6vp6EP!GQi2{W%St?GBE zzALj2P{J|aDA(A@|EIP!YAvnAjpyxBpqZl*zvKe8*{{WXR4zGMHbXG zt%lECm5q(8!;@aGM~a?ni0S%qBZCDLtrY2&jMkOi9U+lcQ5uH9*FbeSaPkD0rY%sR z^~T)AiM73zMpwG%|Irz_|GFsC)`NGW2HgAJZfAQmowejFAsO6zKEW}dLbK|HWc;f; zAGQK(_c}DoRT!gulomSQX8WW4kB?vE{Ti9e&BFo01BxB*+%1IgrK=juVfb`pf?h9f z0g5G$kGQRD76&C<&@UT^-`IjeLSe>+s_C;ldU<_+l`pe+b$NevFefXAjB6z&dk#xuRkzuX>v zc)s93L;rDFwH52&_1f;g3j3B$7NhloRes_VXCguOje2-5ztFM0x8e*x7)|=}L3Xp? zx@=I9?4UKZ+3RCwP9vr_0w3XG5pr)EJVV*M6MHE`ayVYrdvV0#=DojJc#sWbzq zNtgSvrR4;|&O+n6n{nM2O?lVtEcyw<^;hcv(F7p~i80_?vQr?>1Y&u6N?h>$^V*1H zK8E-7WdnjyVPCBkCww92Et*ZxK(C9VKl3P6uhdSgx}lKl&2r?tQaO@2t6h5GOzB<| zcx^cKkBvo8Xu?}jpyiKAjSYN^`@G((Kealvyby!R%|{$Pn=UGw(dcv> zJKMgLtBZ3bIa}t{YnpTH9z^>)S>#bQ(W26`KL6^FcIqSaoH-87cRf==&2Np0%uWK}io!f>PfN!z$KEVoIsGf+iHRpBpm%n8EVl7srW+&0|!a}B78^D6tq&CVH=QMj+ zT@yF28v6aCy7*afP9b*k7C=`RA1kx-KQkBII5jPt{9)dNFRXWs!&!Ru%Xq&MkdC3e zI@Njr*nZ^)Ml{R@ehtXJCWVF-Y^lIPbEp`F`c1Jx$R4j{n)Ro=tzo&Qr3rlUJ*x z?qiqf2DSodll}llz~ND^ayP!QRxVr(~e&r?+ws_Q}(dZ{Zb%*GU0DkrvwS z)2d%_$Z{l$axm*!x-zp>;G{Zvio*u(j4t2_K3je;P5Jj{*HaeU`k$ZU*`BI;P3kf9 zwC8a}`F7#JRymjz9BQ5-vESs#12%Jts-kCl1gQA=C%E4>+@jU0FDZs@CC(2yG<1g7 z%g+W9k7fi&s2lBR9YRfu14+kEhh*=5{iB7xQ*={ZO&S?8{Z6(;s_87qge?R0N)at! zVnH6x6`TM(ayoQ|SL@It4F4;K;~y?5BE_TGn+LN37Z-Pdlvy|{ne+KM@1KKov=!t# z6Z>=NkOwJ%HF8xlCA{?@*r|S{HvZ)G9BKLYS?D8+tgSTckt?mr-FaV4j?Bfi-r!M( zcfb00cQF%9e|i6(*5%G^P5`rABJZRVs)2$&20x}gWO;l}=H}|<`zxo_CpTz$P{V_X zXKsNcEnp8)D6ve!A>&qGW5pRN)51qU*!YM#@Ha#fOs=Hfesa>}JVy^G8G7n;>t+3c zLHUdSl<4;J8?wxclH;Jflk(vEr^{0@>cs{xF&8=yJ-^7*P}c`If)3YY6)5C1x(B=a zsQ9NuMsnpYC6HPg|~fom~;$<$If z1Wt9{)<}IFMZ`(gfk|Q^UYT7b>+u3Kr(X0XObZT{8fvU3f6_PcDEMug8MT?NV7-cCDlPp;>(~1aL z#vb>RC1j=uS!Vo?Q_tt~fBk#$dp^&Lr&k@WbB^OUm+QQa^SHjp_e#4hndz^V2LK)A ziUm$(4`lS3(`o!U4v(00qNSjcQxVWYlHY?-0;-_ssZVpMw8wk=^T-JrL`S74%_abE z7u8~JUO%yaP;iIe4`1FZ?e(b6dIgr#k8-4GJr^49^lttIZ^-&S^JmKTp}I8Fc(#x& z;4!yk@xj;s0F{PwRiyFc090Ri z1HH!RsT4tsF`X@B;Emk}5TDb-jK&|{f>=G|&i5;4<1w|gcqz;SkVW~)EjTQBy~+CU zX{&3$rOs}+w&ifWAjK?R4r-bn4q~0gfFr6nHfQ@>y6PkW}Fo^B9f~H%{-32jEWoR0k<08PwMOx<-G_h!amJdW) z0^3VoCNxcjrbbu69Xm;AYLnq2;QJQosRMT$0bJnQh8z$ASDzk{6kL`lpLc3vrHU$R z!c`2w#?%pTE&=Ls9B+Uc*R;45ls^?9gxLT+ZcqfJ8qojFBCxNf1WH?vfHaAur${o9 z6l4s6K}t9HmXih`l4D0xF_z7vL2Mb4b<7G$;At}PgJ^hemWDw1(}Q&aYBz!DAtxOS zMoWN+d_b?eUPICbBw1g>8rFh&&%Gd3H5XyYBp9jAv(SqmnCJ!#xR##;j&9uvc&}9W z9oqzSHi6Dzq{RkF=*(T73HGwVGrNqn0?FE-WV5v&rk3R*pwv4dpKY9&g;J}Mki#h>T_gO%% z3c|IVJ8-Q8YB=qL;F=>Hq-p32V>MhW-9;3}wm@(cz6XYc;SWxA+JWtH7?OdnV2_~* zQ<;Ca1s5)hln%oJp{#(B?FKX9Lzf72aLLu<_A%1?`3Fu8%B1Z7JxvMyU^55 z8*HFLHP#=n&?rNwsS`Af76o^%!<0}wU@!+1K##8iG<8HxkAegGV9+LOt-yc+_|~9~ zE+xSW1xHv|lyu~2H>yt9oQ-EGqw$jlPd)!K5f8V|0-Y78`~L#-u9u<1qfl?ACIo}q zY79z$!u|1EGDH$`^aBiO>25Ib+yEpK_+6vf{zKCoERX^)8rK*LN56)d2I! zPB2jFh9D7;M1?N7MuKbVu#l5wV44~fz_lYVgKEy;sOx>Wb^toxAP@7Y4mT1*0(1`F z1DzK@3qy<-cO_o(1iWWKf4&8SAZ2nhg0L1UyEvSeEx2v)TTAF;!-EsQN-DL5>v)=2!%9>|#6!Fo7&T_(?OY{waex0Zi$EivT9R(=5CTrYJlC z7B~eKF=a1wfKmbt8H8-SS|1o<*gyvur{G#Jx`ygN>^5fO870s;iY(lTl;dE44I>?X z8y4>J9uln!Bj%|A$zwLGT`6hkMyE22irv0!yzd$G!{?A+U|j=JsC!1>j8O!X$K5;V zE{HZ1Oo{|eJ2!yE(_nGnEUbxeS-6;=9=8JSN6;D#Q_a{7TK9K@tG;g)%)*C|);!cS z^N71(73!j66lgU{Xu%D`rh%32fWekJZX%v+i`@ADHzS37G?k?T`y0y!GSjTYOepcm zhJLs|SSt{&V&pFP9QGdAQGkV`R3{s6iWZ)!EZEP9Xu;*&2BkSEyK%pze}UwR5Dlip zY`o`TFp<0w2H#W#q>V`01D#5CFcAlrr7*QTWZqT5Y`_>zlQP_slpF;!%5&6z-IP-U!WXrw&rOLcwgmA8I;`2w&D+tRp5BL-wh_ z2~}_|6l)k;hQOOj4Yn##nDn|AVE|JMqNiFd^!iE^DX04&7vu$)RIk)!2<>Izt;xUU zP=({(H;{Pi4d8z6d#UR?pbzQ8{_XUpyKZ!9sX!~LQ{HQE?(1C*C*YbNh+vsJoM zLRu3+aBA6P;2=TJj3F3AKd;8`H4+oT2R!Nml<1}_(?3Q~iBHd3ZQ#=ZCXR!%^Nrfv z6DMfweG@eoRJ_ZF5?7i&l40t)*<+M9#K@Ne5o;=Pejpw%F`;sG{zsp%=V@W{qY=Fu zjZY0AWP0~t^I>lk!kEq#=mJct=Bq5^TGh78{zB%c);Ri(b+e zYiPYHm8I%YQT&;g6J9>!?0XXp{pPx4?6WhCV}~%e*YlatMx~kyX+tC2r>f)R@98eC zlK!E)eb_zDC)vDA8Q$Zb98%4J*4eU&%6^{rJ})t`E-rKK9@Cv0+)-IE@lmVjz+5PC z!MaASXa2TN=;K7{q z6#X{sqZ1iVv@7sgliEp`N{bXfTATBAm?Rq9YPL@$s79NanNpBw+v*ZzZ!TDdi@KXS z-!ap6F{m-LD?J`N7IkR&qx7#hZc(EteQxQ$RQV5k9`OTQQyXl@3!@J&)W{Z&1ErCL zy^%CMIE~8Lm)2AfePE_@uP&!6UB@lGyQ%iPdCH;EJv~S7T>Ab>xRSeLsw22ab>~7Y zpI}!&cbYhqn3h#%S3SP;ZH&r3TBF5X(5uYf*FQU3S*p$4G?%tHX?+`iW+aauG+aNa z-gK$Dn+s9pj~HG<@p97dOUme&TM~25)0XYj+Ri0M*ZJw4rcgh(!Tz`-@Xg)*kB^E^ zYSD+x=YBPg=%{~q^?~?U3#x>=0SVpl(jp@vT=DYXVN7#i3gk>xS@;Ibbh!m`;dbhM z*MaX8d(lx$UHVDqw*o|maoppI^7!yQAC7K+iHGf`3^(nu0-rOfp7ob1Let&F7XhsP zJcPFRqb9H?#-O4|#6M80C&kFiO>i_{uc@S0SxNe&D<4;2Zy|jr`Pem4dn1Q@!^9L! zV|*TplY6}!qm4>dZtiz!nxEnUaQjm2BC+4CT8jD2jfyIiRlIElr%b16W$zTDEM!O; zuddo=AT501gHoYq-ch|Kshd(w6<3_~n2X+qd@kWv!9=6Zw3BnGOZnWSlXQLcH!{B? z4MO@pE+vjQSKb~bS7)i$y4WxlsKQ+!g)g3LKcClMVtQ}Fau^;#aQ{bV{JG~JM7_32 z^pQJH((V2yr>i54b2`Fr_k1v*{i>773-Tt#WT*z7s^5Rslw~hqLx{YAb1gr8-$<|p zcONfB)_d$+fDfcq;C$_Eh=hWQu_y1iMb#RWTnoDU0zR@KDMK?bMSPI>>8OBzqEle6 zh2~KFyP)~NpB~y5Oy7^+Db}ym8sFTuS2aQTtB>Q!jV7R3kT=9-;P()#^u zLqgZC?~s4>;#P*1etzXrwgdIdyopBsky1^AOZ@aApCI}d+ovnv_c#UeYe=zHQE73b zMOBLpr-?mwDkQlHw=fy2?ieO-o;tBbmAMqM(sL_3%=WL2lX{#e)ZVf!u%-2KKO%=1 zHZxi8ryQqdTxSq%5dE%dmK>wa!8;_yQM4|f6spGSh@U<1v-wcph2f98<239P&z@}D z|Lf_%f$?$qUAz1CkH*Y?$tcuqT0FihwgcasXCKmMaMC>DYj2cHA#<$HHjhc~O2<92 zx8c0C9l!jn7tbvtBwt+n{{7I}Y5NH7xt*V!*$zfxwN>S<&O`!lcPx}{UhutTThw>F zvAL|f@z3XU`^GF@zE$F)MZ+_TQwBi-Qq*~PTx6crId@QSux+%dUetpWljZWSqwD4J zSQT4O<5R`YCbV^otPhD`6cZk67xCO}G9%i@l>gGpQ!dl`oX7PO4!M7;_TupyqA9v} zDk{@?Br(~j2v4`1`Y*nLxk-++Qq`vdiZM>!%+}o*dij4d0TX|5#EvaJ_pEsMt2|{= z>*J)n<_XI*LxYILlkzK(=Wxuim)Xj#1eTxOrNp~~2h(X9RAbo=DaW!>U zPq*rENRHBat~4t_A*X!s+S#_rN%AD?*^U+n#u`5-?p+GV(DWI7~PzpCH) z@RFM!yc&@%>htL9C;2H)XZE9(I3C@BhS)1d2N#e85T>gf&kzw&y(4fFQ6nYkqZdn-Pwsv&;#i_B0z0 zp%O(4MKkfqIW0I81of>=TZLIaOy$}(J7vn90EEy@m(@m&z`Ym(;HUe$j>b%cqO#ZP zh60$W2nu#gyh?umbeX*5hS$G-OCc!O*`S^J+eiS!xS0B&ae^Ggap7HvMnv6JoeVw0 zSfQ2>YJr6M2@-<8x%LMCzP5;5)np|8*2zRVrR>byBOpf7hX-HsA@Z_D>>vcY8$)0Z z?^&Z-)r@LYW?*~N$Z>Souy~I)Tz{%~)wmP)Ttm9Fc0e%@;^n1L zXS(`4FuNNN!jVnl0D&)a!c>g#qBgCr_d#zpHRSZnBJA{@K4DY$wmg@mG2Q=&&uWG; zEt zft8%NxMBeD0<9$+^{rPaD+WHk0D-pCjUi}(D}|<;Y5Z4c0>(9Si4M`Oc{8; z#pfG@Y}fhT4tPr@hPL5Zs=z`XVOjj8~MG2iU}d!Kop_thaghGkz3{K8y?Ls z6fOy44f|k6mbh{SNC2M+OyxRkbTQnpY;&TY$qdou19`9-UD5umAr4JH{M!r;!N30$ zIrL?q^PtoR!MyZ9n9z#YFLu2ch&WVDL#RT4V1?EG<`gp0oBxEeW-4Jb*{alBDqyTU zfiZS2ZX9v?bPMbKdNuPvgnIV-T)ir@*PF?k2>^bNW&GMYELj9*mLWux^zHE~^Dh)< z$>W=o3|~!mIP0zAh^5hPli4I&oX6W1Sm z2-Qjza>UhQU)>h0IF|h*)M0YXjTF_M^ih?buEV zC&)qRG+|wlNc*1;KPW!A@{XW4>69QdNc|v9SPj)iaMH%0Dz+J)uKMz4%ey8;QjGQ_ zuX6e?^VNh%dfN^PQL!#&WNk_EWcUx?8+Z<4XYAQN)e*U_U2%J++Ms`_cHPx;qcIXf zUCeU_Vw>)Yj>;ROR3y?HS+Rw#{r!tr1g^JyO3z%vRME^X561;`VsK09b^|JBe)hU| z!^JNX$;J7eJ|$LPEu2YF-Tj1(yQu&`D2HCd2_DppZBPsB64$vS13% zbLt=Xj2fOmyv8RBOfOgF^2az#H|i$tM?&pmg1*?Q_Du$2LR~XXl>sV^_q;wX?sVDA zwf<;50N(dnR>Ha1J+OiuZAmL0@!xq zIi2UJ{m)hLKl2U!t2((&8LG0{SwzEY_9sllL_H>AVs8V#TXg_b)t8`c{GLyBIgsvM z^j(GJ*IAJw&hcUlU;U#+iB|yXB$6Z~n;*b?t0;rbeL$FX7xsCOx79XBO8OnG7l4v3 zIIf4UJN{-Tka8jp=>G&M{eamzET}t((>XQr5oNFLOnWo+F5f^+4h>p-9jEbh)YJ+ET!o_$ZwR z$`Nlm+UMU5aX!DSbo7q4rdCUIC3>EoQ7>!)Hr7&!u4>N_yJ*Mgegg1g&Z4I$+aq_n z#;V4xd|GuHUPsUks{11+gu`Va&5q@mN0j06@vLQXgVSUnHnFW30r-P^-L~^p-$<=0 zWk9!_qI#zqr63lRFVFu&V91akZeJ*uNa3P-SFv^VL`9n&J+WDbJ$p)@J~a{?M(2T$ z!Or_Y$<#MKBseP7568o2URSf(^cn{?vyEGSDGdk`xHrO<{yP_zdv{^;mm!E?-PSGfX>>Bh`Bt*vfZL z{Ho+@lq)ekItt!VIfZ)T1ruX=^wVB7u1{%JM zh-TuP&Q$vddLRDjseFxmU3Br|+Q6W}DO{__fuELT`JsC5Kwml%%Z=;JomS0eI;F_B z+In33p^>mH_e-4DR;Uei9(v-M zE^jPZ0_Ek7i{T0h7?->)eLyvkCw@k5RfF_`gSlWOF398Q8*ZVz0F8Z@rN*_jiV!WI zy6=^*-GtXrr)4$ri|TicbK;HSR@hq%lVXB_*j9O_UbL&m^U%!B`f=gXu35s~C%Srp zb>h7?bOHa7mk!Q@$!*dzp&b@y3E>5%4H7MvPuk?*~X}te9xC+#WpoJ@iXH43pDIwC>kG!@`m1a z07r>8zq_=pYPPH_wY26Oq?aDWiGHFvsqA`k{Iwr(Ly1#@LL{!17bZPcnSiVLGVw$# z^4!kvTz(&O{N!x?s+Ju)dQPYS^$IC0--TCF{f(AroxPsNCA0tXo{q5*?z$ef=9+S$ z3Hiw)u)$I_~4hG|W0v9X1l#US^PMBgR{=EI?6$mmT4`RrE+~k-hB2$(g+}agO6GJ?Y{M1Y+d*h4%h}(4-)CH}?<}2JyNI2ugA# z{Kx(|)^F4s_h*Kry*1U6XgFW@%#j=?*Ro?<{gcM67Zw+Y1WmcF(r+WbW{E}Ilzlyk zf9&7wG=D)sGZE-~;ilOYW#uf8dS zGYg-Cj~e_pb0MN|Jc~HaXBPp9q(RX|V8_0xnTeh)fQi8v$;zF;+qq)cnz$wq4hI+_ z*+Xz$|E7>xju(RJvO-Dt*HAi9(E*fOP^$^MMTKh(Vtq+cxCA`rThyG#xVlk?U(SQa)Z51MCQ6=*00Fb4MkZFX)!IJ^h}Hn6G!F`erbU zFvQu(+CGoJV*JQ0*G0mVb&>25f!La#-GSm{ED=t2EW-y?? z6GDMXry|&mqD3(9g(q{_Y}LpC#2)V;hB0=)I3Sqy1Nel||F7@*-?n1F#sAc?^WRyx z|L4a4eGTg0S%<%9-Oo;=F9C%c@Y$6kmYd=Kx0aJAN0vVcB)j~Xt=Q!|uy*-a14keD i*2M4I50ZHQlg-7gPOdYEx|C3Y%#se98x|a-MEpONf+$n~ literal 0 HcmV?d00001 diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 8f1ecfd811..6e32d57655 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -51,7 +51,7 @@ const sidebars = { { type: "category", label: "Architecture", - items: ["proxy/architecture", "proxy/db_info", "router_architecture"], + items: ["proxy/architecture", "proxy/db_info", "router_architecture", "proxy/user_management_heirarchy"], }, { type: "link", @@ -99,8 +99,13 @@ const sidebars = { }, { type: "category", - label: "Spend Tracking + Budgets", - items: ["proxy/cost_tracking", "proxy/users", "proxy/custom_pricing", "proxy/team_budgets", "proxy/billing", "proxy/customers"], + label: "Spend Tracking", + items: ["proxy/cost_tracking", "proxy/custom_pricing", "proxy/billing",], + }, + { + type: "category", + label: "Budgets + Rate Limits", + items: ["proxy/users", "proxy/rate_limit_tiers", "proxy/team_budgets", "proxy/customers"], }, { type: "link", @@ -135,9 +140,17 @@ const sidebars = { "oidc" ] }, + { + type: "category", + label: "Create Custom Plugins", + description: "Modify requests, responses, and more", + items: [ + "proxy/call_hooks", + "proxy/rules", + ] + }, "proxy/caching", - "proxy/call_hooks", - "proxy/rules", + ] }, { diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index e7e4a8cdb2..5454c5fcb0 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -633,8 +633,12 @@ class PrometheusLogger(CustomLogger): ) remaining_tokens_variable_name = f"litellm-key-remaining-tokens-{model_group}" - remaining_requests = metadata.get(remaining_requests_variable_name, sys.maxsize) - remaining_tokens = metadata.get(remaining_tokens_variable_name, sys.maxsize) + remaining_requests = ( + metadata.get(remaining_requests_variable_name, sys.maxsize) or sys.maxsize + ) + remaining_tokens = ( + metadata.get(remaining_tokens_variable_name, sys.maxsize) or sys.maxsize + ) self.litellm_remaining_api_key_requests_for_model.labels( user_api_key, user_api_key_alias, model_group diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 277c31acc6..a226462ff7 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -12,6 +12,7 @@ from litellm.types.integrations.slack_alerting import AlertType from litellm.types.router import RouterErrors, UpdateRouterConfig from litellm.types.utils import ( EmbeddingResponse, + GenericBudgetConfigType, ImageResponse, LiteLLMPydanticObjectBase, ModelResponse, @@ -614,7 +615,6 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase): rpm_limit: Optional[int] = None budget_duration: Optional[str] = None allowed_cache_controls: Optional[list] = [] - soft_budget: Optional[float] = None config: Optional[dict] = {} permissions: Optional[dict] = {} model_max_budget: Optional[dict] = ( @@ -622,7 +622,6 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase): ) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {} model_config = ConfigDict(protected_namespaces=()) - send_invite_email: Optional[bool] = None model_rpm_limit: Optional[dict] = None model_tpm_limit: Optional[dict] = None guardrails: Optional[List[str]] = None @@ -630,21 +629,25 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase): aliases: Optional[dict] = {} -class _GenerateKeyRequest(GenerateRequestBase): +class KeyRequestBase(GenerateRequestBase): key: Optional[str] = None - - -class GenerateKeyRequest(_GenerateKeyRequest): + budget_id: Optional[str] = None tags: Optional[List[str]] = None enforced_params: Optional[List[str]] = None -class GenerateKeyResponse(_GenerateKeyRequest): +class GenerateKeyRequest(KeyRequestBase): + soft_budget: Optional[float] = None + send_invite_email: Optional[bool] = None + + +class GenerateKeyResponse(KeyRequestBase): key: str # type: ignore key_name: Optional[str] = None expires: Optional[datetime] user_id: Optional[str] = None token_id: Optional[str] = None + litellm_budget_table: Optional[Any] = None @model_validator(mode="before") @classmethod @@ -669,7 +672,7 @@ class GenerateKeyResponse(_GenerateKeyRequest): return values -class UpdateKeyRequest(GenerateKeyRequest): +class UpdateKeyRequest(KeyRequestBase): # Note: the defaults of all Params here MUST BE NONE # else they will get overwritten key: str # type: ignore @@ -765,7 +768,7 @@ class DeleteUserRequest(LiteLLMPydanticObjectBase): AllowedModelRegion = Literal["eu", "us"] -class BudgetNew(LiteLLMPydanticObjectBase): +class BudgetNewRequest(LiteLLMPydanticObjectBase): budget_id: Optional[str] = Field(default=None, description="The unique budget id.") max_budget: Optional[float] = Field( default=None, @@ -788,6 +791,10 @@ class BudgetNew(LiteLLMPydanticObjectBase): default=None, description="Max duration budget should be set for (e.g. '1hr', '1d', '28d')", ) + model_max_budget: Optional[GenericBudgetConfigType] = Field( + default=None, + description="Max budget for each model (e.g. {'gpt-4o': {'max_budget': '0.0000001', 'budget_duration': '1d', 'tpm_limit': 1000, 'rpm_limit': 1000}})", + ) class BudgetRequest(LiteLLMPydanticObjectBase): @@ -805,11 +812,11 @@ class CustomerBase(LiteLLMPydanticObjectBase): allowed_model_region: Optional[AllowedModelRegion] = None default_model: Optional[str] = None budget_id: Optional[str] = None - litellm_budget_table: Optional[BudgetNew] = None + litellm_budget_table: Optional[BudgetNewRequest] = None blocked: bool = False -class NewCustomerRequest(BudgetNew): +class NewCustomerRequest(BudgetNewRequest): """ Create a new customer, allocate a budget to them """ @@ -1426,6 +1433,19 @@ class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken): # Time stamps last_refreshed_at: Optional[float] = None # last time joint view was pulled from db + def __init__(self, **kwargs): + # Handle litellm_budget_table_* keys + for key, value in list(kwargs.items()): + if key.startswith("litellm_budget_table_") and value is not None: + # Extract the corresponding attribute name + attr_name = key.replace("litellm_budget_table_", "") + # Check if the value is None and set the corresponding attribute + if getattr(self, attr_name, None) is None: + kwargs[attr_name] = value + + # Initialize the superclass + super().__init__(**kwargs) + class UserAPIKeyAuth( LiteLLM_VerificationTokenView @@ -2194,9 +2214,9 @@ class ProviderBudgetResponseObject(LiteLLMPydanticObjectBase): Configuration for a single provider's budget settings """ - budget_limit: float # Budget limit in USD for the time period - time_period: str # Time period for budget (e.g., '1d', '30d', '1mo') - spend: float = 0.0 # Current spend for this provider + budget_limit: Optional[float] # Budget limit in USD for the time period + time_period: Optional[str] # Time period for budget (e.g., '1d', '30d', '1mo') + spend: Optional[float] = 0.0 # Current spend for this provider budget_reset_at: Optional[str] = None # When the current budget period resets diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py index f73e045075..c1091d500f 100644 --- a/litellm/proxy/auth/auth_utils.py +++ b/litellm/proxy/auth/auth_utils.py @@ -418,6 +418,12 @@ def get_key_model_rpm_limit(user_api_key_dict: UserAPIKeyAuth) -> Optional[dict] if user_api_key_dict.metadata: if "model_rpm_limit" in user_api_key_dict.metadata: return user_api_key_dict.metadata["model_rpm_limit"] + elif user_api_key_dict.model_max_budget: + model_rpm_limit: Dict[str, Any] = {} + for model, budget in user_api_key_dict.model_max_budget.items(): + if "rpm_limit" in budget and budget["rpm_limit"] is not None: + model_rpm_limit[model] = budget["rpm_limit"] + return model_rpm_limit return None @@ -426,6 +432,9 @@ def get_key_model_tpm_limit(user_api_key_dict: UserAPIKeyAuth) -> Optional[dict] if user_api_key_dict.metadata: if "model_tpm_limit" in user_api_key_dict.metadata: return user_api_key_dict.metadata["model_tpm_limit"] + elif user_api_key_dict.model_max_budget: + if "tpm_limit" in user_api_key_dict.model_max_budget: + return user_api_key_dict.model_max_budget["tpm_limit"] return None diff --git a/litellm/proxy/hooks/model_max_budget_limiter.py b/litellm/proxy/hooks/model_max_budget_limiter.py index 5d5e56e014..3befca8516 100644 --- a/litellm/proxy/hooks/model_max_budget_limiter.py +++ b/litellm/proxy/hooks/model_max_budget_limiter.py @@ -9,8 +9,8 @@ from litellm.proxy._types import UserAPIKeyAuth from litellm.router_strategy.budget_limiter import RouterBudgetLimiting from litellm.types.llms.openai import AllMessageValues from litellm.types.utils import ( + BudgetConfig, GenericBudgetConfigType, - GenericBudgetInfo, StandardLoggingPayload, ) @@ -42,12 +42,8 @@ class _PROXY_VirtualKeyModelMaxBudgetLimiter(RouterBudgetLimiting): _model_max_budget = user_api_key_dict.model_max_budget internal_model_max_budget: GenericBudgetConfigType = {} - # case each element in _model_max_budget to GenericBudgetInfo for _model, _budget_info in _model_max_budget.items(): - internal_model_max_budget[_model] = GenericBudgetInfo( - time_period=_budget_info.get("time_period"), - budget_limit=float(_budget_info.get("budget_limit")), - ) + internal_model_max_budget[_model] = BudgetConfig(**_budget_info) verbose_proxy_logger.debug( "internal_model_max_budget %s", @@ -65,7 +61,10 @@ class _PROXY_VirtualKeyModelMaxBudgetLimiter(RouterBudgetLimiting): return True # check if current model is within budget - if _current_model_budget_info.budget_limit > 0: + if ( + _current_model_budget_info.max_budget + and _current_model_budget_info.max_budget > 0 + ): _current_spend = await self._get_virtual_key_spend_for_model( user_api_key_hash=user_api_key_dict.token, model=model, @@ -73,12 +72,13 @@ class _PROXY_VirtualKeyModelMaxBudgetLimiter(RouterBudgetLimiting): ) if ( _current_spend is not None - and _current_spend > _current_model_budget_info.budget_limit + and _current_model_budget_info.max_budget is not None + and _current_spend > _current_model_budget_info.max_budget ): raise litellm.BudgetExceededError( message=f"LiteLLM Virtual Key: {user_api_key_dict.token}, key_alias: {user_api_key_dict.key_alias}, exceeded budget for model={model}", current_cost=_current_spend, - max_budget=_current_model_budget_info.budget_limit, + max_budget=_current_model_budget_info.max_budget, ) return True @@ -87,7 +87,7 @@ class _PROXY_VirtualKeyModelMaxBudgetLimiter(RouterBudgetLimiting): self, user_api_key_hash: Optional[str], model: str, - key_budget_config: GenericBudgetInfo, + key_budget_config: BudgetConfig, ) -> Optional[float]: """ Get the current spend for a virtual key for a model @@ -98,7 +98,7 @@ class _PROXY_VirtualKeyModelMaxBudgetLimiter(RouterBudgetLimiting): """ # 1. model: directly look up `model` - virtual_key_model_spend_cache_key = f"{VIRTUAL_KEY_SPEND_CACHE_KEY_PREFIX}:{user_api_key_hash}:{model}:{key_budget_config.time_period}" + virtual_key_model_spend_cache_key = f"{VIRTUAL_KEY_SPEND_CACHE_KEY_PREFIX}:{user_api_key_hash}:{model}:{key_budget_config.budget_duration}" _current_spend = await self.dual_cache.async_get_cache( key=virtual_key_model_spend_cache_key, ) @@ -106,7 +106,7 @@ class _PROXY_VirtualKeyModelMaxBudgetLimiter(RouterBudgetLimiting): if _current_spend is None: # 2. If 1, does not exist, check if passed as {custom_llm_provider}/model # if "/" in model, remove first part before "/" - eg. openai/o1-preview -> o1-preview - virtual_key_model_spend_cache_key = f"{VIRTUAL_KEY_SPEND_CACHE_KEY_PREFIX}:{user_api_key_hash}:{self._get_model_without_custom_llm_provider(model)}:{key_budget_config.time_period}" + virtual_key_model_spend_cache_key = f"{VIRTUAL_KEY_SPEND_CACHE_KEY_PREFIX}:{user_api_key_hash}:{self._get_model_without_custom_llm_provider(model)}:{key_budget_config.budget_duration}" _current_spend = await self.dual_cache.async_get_cache( key=virtual_key_model_spend_cache_key, ) @@ -114,7 +114,7 @@ class _PROXY_VirtualKeyModelMaxBudgetLimiter(RouterBudgetLimiting): def _get_request_model_budget_config( self, model: str, internal_model_max_budget: GenericBudgetConfigType - ) -> Optional[GenericBudgetInfo]: + ) -> Optional[BudgetConfig]: """ Get the budget config for the request model @@ -175,8 +175,8 @@ class _PROXY_VirtualKeyModelMaxBudgetLimiter(RouterBudgetLimiting): virtual_key = standard_logging_payload.get("metadata").get("user_api_key_hash") model = standard_logging_payload.get("model") if virtual_key is not None: - budget_config = GenericBudgetInfo(time_period="1d", budget_limit=0.1) - virtual_spend_key = f"{VIRTUAL_KEY_SPEND_CACHE_KEY_PREFIX}:{virtual_key}:{model}:{budget_config.time_period}" + budget_config = BudgetConfig(time_period="1d", budget_limit=0.1) + virtual_spend_key = f"{VIRTUAL_KEY_SPEND_CACHE_KEY_PREFIX}:{virtual_key}:{model}:{budget_config.budget_duration}" virtual_start_time_key = f"virtual_key_budget_start_time:{virtual_key}" await self._increment_spend_for_key( budget_config=budget_config, diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py index b1a2716a4e..656e2f8804 100644 --- a/litellm/proxy/hooks/parallel_request_limiter.py +++ b/litellm/proxy/hooks/parallel_request_limiter.py @@ -317,7 +317,6 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): _tpm_limit_for_key_model = get_key_model_tpm_limit(user_api_key_dict) _rpm_limit_for_key_model = get_key_model_rpm_limit(user_api_key_dict) - if _model is not None: if _tpm_limit_for_key_model: @@ -325,6 +324,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): if _rpm_limit_for_key_model: rpm_limit_for_model = _rpm_limit_for_key_model.get(_model) + if current is None: new_val = { "current_requests": 1, @@ -485,6 +485,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): ) try: self.print_verbose("INSIDE parallel request limiter ASYNC SUCCESS LOGGING") + global_max_parallel_requests = kwargs["litellm_params"]["metadata"].get( "global_max_parallel_requests", None ) @@ -495,6 +496,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): user_api_key_team_id = kwargs["litellm_params"]["metadata"].get( "user_api_key_team_id", None ) + user_api_key_model_max_budget = kwargs["litellm_params"]["metadata"].get( + "user_api_key_model_max_budget", None + ) user_api_key_end_user_id = kwargs.get("user") user_api_key_metadata = ( @@ -568,6 +572,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): and ( "model_rpm_limit" in user_api_key_metadata or "model_tpm_limit" in user_api_key_metadata + or user_api_key_model_max_budget is not None ) ): request_count_api_key = ( diff --git a/litellm/proxy/management_endpoints/budget_management_endpoints.py b/litellm/proxy/management_endpoints/budget_management_endpoints.py new file mode 100644 index 0000000000..20aa1c6bbf --- /dev/null +++ b/litellm/proxy/management_endpoints/budget_management_endpoints.py @@ -0,0 +1,287 @@ +""" +BUDGET MANAGEMENT + +All /budget management endpoints + +/budget/new +/budget/info +/budget/update +/budget/delete +/budget/settings +/budget/list +""" + +#### BUDGET TABLE MANAGEMENT #### +from fastapi import APIRouter, Depends, HTTPException + +from litellm.proxy._types import * +from litellm.proxy.auth.user_api_key_auth import user_api_key_auth +from litellm.proxy.utils import jsonify_object + +router = APIRouter() + + +@router.post( + "/budget/new", + tags=["budget management"], + dependencies=[Depends(user_api_key_auth)], +) +async def new_budget( + budget_obj: BudgetNewRequest, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Create a new budget object. Can apply this to teams, orgs, end-users, keys. + + Parameters: + - budget_duration: Optional[str] - Budget reset period ("30d", "1h", etc.) + - budget_id: Optional[str] - The id of the budget. If not provided, a new id will be generated. + - max_budget: Optional[float] - The max budget for the budget. + - soft_budget: Optional[float] - The soft budget for the budget. + - max_parallel_requests: Optional[int] - The max number of parallel requests for the budget. + - tpm_limit: Optional[int] - The tokens per minute limit for the budget. + - rpm_limit: Optional[int] - The requests per minute limit for the budget. + - model_max_budget: Optional[dict] - Specify max budget for a given model. Example: {"openai/gpt-4o-mini": {"max_budget": 100.0, "budget_duration": "1d", "tpm_limit": 100000, "rpm_limit": 100000}} + """ + from litellm.proxy.proxy_server import litellm_proxy_admin_name, prisma_client + + if prisma_client is None: + raise HTTPException( + status_code=500, + detail={"error": CommonProxyErrors.db_not_connected_error.value}, + ) + + budget_obj_json = budget_obj.model_dump(exclude_none=True) + budget_obj_jsonified = jsonify_object(budget_obj_json) # json dump any dictionaries + response = await prisma_client.db.litellm_budgettable.create( + data={ + **budget_obj_jsonified, # type: ignore + "created_by": user_api_key_dict.user_id or litellm_proxy_admin_name, + "updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name, + } # type: ignore + ) + + return response + + +@router.post( + "/budget/update", + tags=["budget management"], + dependencies=[Depends(user_api_key_auth)], +) +async def update_budget( + budget_obj: BudgetNewRequest, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Update an existing budget object. + + Parameters: + - budget_duration: Optional[str] - Budget reset period ("30d", "1h", etc.) + - budget_id: Optional[str] - The id of the budget. If not provided, a new id will be generated. + - max_budget: Optional[float] - The max budget for the budget. + - soft_budget: Optional[float] - The soft budget for the budget. + - max_parallel_requests: Optional[int] - The max number of parallel requests for the budget. + - tpm_limit: Optional[int] - The tokens per minute limit for the budget. + - rpm_limit: Optional[int] - The requests per minute limit for the budget. + - model_max_budget: Optional[dict] - Specify max budget for a given model. Example: {"openai/gpt-4o-mini": {"max_budget": 100.0, "budget_duration": "1d", "tpm_limit": 100000, "rpm_limit": 100000}} + """ + from litellm.proxy.proxy_server import litellm_proxy_admin_name, prisma_client + + if prisma_client is None: + raise HTTPException( + status_code=500, + detail={"error": CommonProxyErrors.db_not_connected_error.value}, + ) + if budget_obj.budget_id is None: + raise HTTPException(status_code=400, detail={"error": "budget_id is required"}) + + response = await prisma_client.db.litellm_budgettable.update( + where={"budget_id": budget_obj.budget_id}, + data={ + **budget_obj.model_dump(exclude_none=True), # type: ignore + "updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name, + }, # type: ignore + ) + + return response + + +@router.post( + "/budget/info", + tags=["budget management"], + dependencies=[Depends(user_api_key_auth)], +) +async def info_budget(data: BudgetRequest): + """ + Get the budget id specific information + + Parameters: + - budgets: List[str] - The list of budget ids to get information for + """ + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise HTTPException(status_code=500, detail={"error": "No db connected"}) + + if len(data.budgets) == 0: + raise HTTPException( + status_code=400, + detail={ + "error": f"Specify list of budget id's to query. Passed in={data.budgets}" + }, + ) + response = await prisma_client.db.litellm_budgettable.find_many( + where={"budget_id": {"in": data.budgets}}, + ) + + return response + + +@router.get( + "/budget/settings", + tags=["budget management"], + dependencies=[Depends(user_api_key_auth)], +) +async def budget_settings( + budget_id: str, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Get list of configurable params + current value for a budget item + description of each field + + Used on Admin UI. + + Query Parameters: + - budget_id: str - The budget id to get information for + """ + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise HTTPException( + status_code=400, + detail={"error": CommonProxyErrors.db_not_connected_error.value}, + ) + + if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN: + raise HTTPException( + status_code=400, + detail={ + "error": "{}, your role={}".format( + CommonProxyErrors.not_allowed_access.value, + user_api_key_dict.user_role, + ) + }, + ) + + ## get budget item from db + db_budget_row = await prisma_client.db.litellm_budgettable.find_first( + where={"budget_id": budget_id} + ) + + if db_budget_row is not None: + db_budget_row_dict = db_budget_row.model_dump(exclude_none=True) + else: + db_budget_row_dict = {} + + allowed_args = { + "max_parallel_requests": {"type": "Integer"}, + "tpm_limit": {"type": "Integer"}, + "rpm_limit": {"type": "Integer"}, + "budget_duration": {"type": "String"}, + "max_budget": {"type": "Float"}, + "soft_budget": {"type": "Float"}, + } + + return_val = [] + + for field_name, field_info in BudgetNewRequest.model_fields.items(): + if field_name in allowed_args: + + _stored_in_db = True + + _response_obj = ConfigList( + field_name=field_name, + field_type=allowed_args[field_name]["type"], + field_description=field_info.description or "", + field_value=db_budget_row_dict.get(field_name, None), + stored_in_db=_stored_in_db, + field_default_value=field_info.default, + ) + return_val.append(_response_obj) + + return return_val + + +@router.get( + "/budget/list", + tags=["budget management"], + dependencies=[Depends(user_api_key_auth)], +) +async def list_budget( + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """List all the created budgets in proxy db. Used on Admin UI.""" + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise HTTPException( + status_code=400, + detail={"error": CommonProxyErrors.db_not_connected_error.value}, + ) + + if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN: + raise HTTPException( + status_code=400, + detail={ + "error": "{}, your role={}".format( + CommonProxyErrors.not_allowed_access.value, + user_api_key_dict.user_role, + ) + }, + ) + + response = await prisma_client.db.litellm_budgettable.find_many() + + return response + + +@router.post( + "/budget/delete", + tags=["budget management"], + dependencies=[Depends(user_api_key_auth)], +) +async def delete_budget( + data: BudgetDeleteRequest, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Delete budget + + Parameters: + - id: str - The budget id to delete + """ + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise HTTPException( + status_code=500, + detail={"error": CommonProxyErrors.db_not_connected_error.value}, + ) + + if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN: + raise HTTPException( + status_code=400, + detail={ + "error": "{}, your role={}".format( + CommonProxyErrors.not_allowed_access.value, + user_api_key_dict.user_role, + ) + }, + ) + + response = await prisma_client.db.litellm_budgettable.delete( + where={"budget_id": data.id} + ) + + return response diff --git a/litellm/proxy/management_endpoints/customer_endpoints.py b/litellm/proxy/management_endpoints/customer_endpoints.py index 47bc7f6165..976ff8581f 100644 --- a/litellm/proxy/management_endpoints/customer_endpoints.py +++ b/litellm/proxy/management_endpoints/customer_endpoints.py @@ -131,11 +131,11 @@ async def unblock_user(data: BlockUsers): return {"blocked_users": litellm.blocked_user_list} -def new_budget_request(data: NewCustomerRequest) -> Optional[BudgetNew]: +def new_budget_request(data: NewCustomerRequest) -> Optional[BudgetNewRequest]: """ Return a new budget object if new budget params are passed. """ - budget_params = BudgetNew.model_fields.keys() + budget_params = BudgetNewRequest.model_fields.keys() budget_kv_pairs = {} # Get the actual values from the data object using getattr @@ -147,7 +147,7 @@ def new_budget_request(data: NewCustomerRequest) -> Optional[BudgetNew]: budget_kv_pairs[field_name] = value if budget_kv_pairs: - return BudgetNew(**budget_kv_pairs) + return BudgetNewRequest(**budget_kv_pairs) return None @@ -182,6 +182,7 @@ async def new_end_user( - budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). - tpm_limit: Optional[int] - [Not Implemented Yet] Specify tpm limit for a given customer (Tokens per minute) - rpm_limit: Optional[int] - [Not Implemented Yet] Specify rpm limit for a given customer (Requests per minute) + - model_max_budget: Optional[dict] - [Not Implemented Yet] Specify max budget for a given model. Example: {"openai/gpt-4o-mini": {"max_budget": 100.0, "budget_duration": "1d"}} - max_parallel_requests: Optional[int] - [Not Implemented Yet] Specify max parallel requests for a given customer. - soft_budget: Optional[float] - [Not Implemented Yet] Get alerts when customer crosses given budget, doesn't block requests. @@ -271,7 +272,7 @@ async def new_end_user( _user_data = data.dict(exclude_none=True) for k, v in _user_data.items(): - if k not in BudgetNew.model_fields.keys(): + if k not in BudgetNewRequest.model_fields.keys(): new_end_user_obj[k] = v ## WRITE TO DB ## diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py index 402e8fbb89..caf48e4342 100644 --- a/litellm/proxy/management_endpoints/key_management_endpoints.py +++ b/litellm/proxy/management_endpoints/key_management_endpoints.py @@ -40,7 +40,7 @@ from litellm.proxy.utils import ( ) from litellm.secret_managers.main import get_secret from litellm.types.utils import ( - GenericBudgetInfo, + BudgetConfig, PersonalUIKeyGenerationConfig, TeamUIKeyGenerationConfig, ) @@ -238,6 +238,7 @@ async def generate_key_fn( # noqa: PLR0915 - key: Optional[str] - User defined key value. If not set, a 16-digit unique sk-key is created for you. - team_id: Optional[str] - The team id of the key - user_id: Optional[str] - The user id of the key + - budget_id: Optional[str] - The budget id associated with the key. Created by calling `/budget/new`. - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models) - aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models - config: Optional[dict] - any key-specific configs, overrides config in config.yaml @@ -249,7 +250,7 @@ async def generate_key_fn( # noqa: PLR0915 - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" } - guardrails: Optional[List[str]] - List of active guardrails for the key - permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false} - - model_max_budget: Optional[Dict[str, GenericBudgetInfo]] - Model-specific budgets {"gpt-4": {"budget_limit": 0.0005, "time_period": "30d"}}}. IF null or {} then no model specific budget. + - model_max_budget: Optional[Dict[str, BudgetConfig]] - Model-specific budgets {"gpt-4": {"budget_limit": 0.0005, "time_period": "30d"}}}. IF null or {} then no model specific budget. - model_rpm_limit: Optional[dict] - key-specific model rpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific rpm limit. - model_tpm_limit: Optional[dict] - key-specific model tpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific tpm limit. - allowed_cache_controls: Optional[list] - List of allowed cache control values. Example - ["no-cache", "no-store"]. See all values - https://docs.litellm.ai/docs/proxy/caching#turn-on--off-caching-per-request @@ -376,7 +377,7 @@ async def generate_key_fn( # noqa: PLR0915 ) # TODO: @ishaan-jaff: Migrate all budget tracking to use LiteLLM_BudgetTable - _budget_id = None + _budget_id = data.budget_id if prisma_client is not None and data.soft_budget is not None: # create the Budget Row for the LiteLLM Verification Token budget_row = LiteLLM_BudgetTable( @@ -547,14 +548,15 @@ async def update_key_fn( - key_alias: Optional[str] - User-friendly key alias - user_id: Optional[str] - User ID associated with key - team_id: Optional[str] - Team ID associated with key + - budget_id: Optional[str] - The budget id associated with the key. Created by calling `/budget/new`. - models: Optional[list] - Model_name's a user is allowed to call - tags: Optional[List[str]] - Tags for organizing keys (Enterprise only) - enforced_params: Optional[List[str]] - List of enforced params for the key (Enterprise only). [Docs](https://docs.litellm.ai/docs/proxy/enterprise#enforce-required-params-for-llm-requests) - spend: Optional[float] - Amount spent by key - max_budget: Optional[float] - Max budget for key - - model_max_budget: Optional[Dict[str, GenericBudgetInfo]] - Model-specific budgets {"gpt-4": {"budget_limit": 0.0005, "time_period": "30d"}} + - model_max_budget: Optional[Dict[str, BudgetConfig]] - Model-specific budgets {"gpt-4": {"budget_limit": 0.0005, "time_period": "30d"}} - budget_duration: Optional[str] - Budget reset period ("30d", "1h", etc.) - - soft_budget: Optional[float] - Soft budget limit (warning vs. hard stop). Will trigger a slack alert when this soft budget is reached. + - soft_budget: Optional[float] - [TODO] Soft budget limit (warning vs. hard stop). Will trigger a slack alert when this soft budget is reached. - max_parallel_requests: Optional[int] - Rate limit for parallel requests - metadata: Optional[dict] - Metadata for key. Example {"team": "core-infra", "app": "app2"} - tpm_limit: Optional[int] - Tokens per minute limit @@ -592,7 +594,7 @@ async def update_key_fn( ) try: - data_json: dict = data.model_dump(exclude_unset=True) + data_json: dict = data.model_dump(exclude_unset=True, exclude_none=True) key = data_json.pop("key") # get the row from db if prisma_client is None: @@ -1135,6 +1137,9 @@ async def generate_key_helper_fn( # noqa: PLR0915 data=key_data, table_name="key" ) key_data["token_id"] = getattr(create_key_response, "token", None) + key_data["litellm_budget_table"] = getattr( + create_key_response, "litellm_budget_table", None + ) except Exception as e: verbose_proxy_logger.error( "litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format( @@ -1247,7 +1252,7 @@ async def regenerate_key_fn( - tags: Optional[List[str]] - Tags for organizing keys (Enterprise only) - spend: Optional[float] - Amount spent by key - max_budget: Optional[float] - Max budget for key - - model_max_budget: Optional[Dict[str, GenericBudgetInfo]] - Model-specific budgets {"gpt-4": {"budget_limit": 0.0005, "time_period": "30d"}} + - model_max_budget: Optional[Dict[str, BudgetConfig]] - Model-specific budgets {"gpt-4": {"budget_limit": 0.0005, "time_period": "30d"}} - budget_duration: Optional[str] - Budget reset period ("30d", "1h", etc.) - soft_budget: Optional[float] - Soft budget limit (warning vs. hard stop). Will trigger a slack alert when this soft budget is reached. - max_parallel_requests: Optional[int] - Rate limit for parallel requests @@ -1956,7 +1961,7 @@ def validate_model_max_budget(model_max_budget: Optional[Dict]) -> None: # /CRUD endpoints can pass budget_limit as a string, so we need to convert it to a float if "budget_limit" in _budget_info: _budget_info["budget_limit"] = float(_budget_info["budget_limit"]) - GenericBudgetInfo(**_budget_info) + BudgetConfig(**_budget_info) except Exception as e: raise ValueError( f"Invalid model_max_budget: {str(e)}. Example of valid model_max_budget: https://docs.litellm.ai/docs/proxy/users" diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index f65c9fe6a3..39c9cd5881 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -178,6 +178,9 @@ from litellm.proxy.hooks.prompt_injection_detection import ( from litellm.proxy.hooks.proxy_failure_handler import _PROXY_failure_handler from litellm.proxy.hooks.proxy_track_cost_callback import _PROXY_track_cost_callback from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request +from litellm.proxy.management_endpoints.budget_management_endpoints import ( + router as budget_management_router, +) from litellm.proxy.management_endpoints.customer_endpoints import ( router as customer_router, ) @@ -5531,238 +5534,6 @@ async def supported_openai_params(model: str): ) -#### BUDGET TABLE MANAGEMENT #### - - -@router.post( - "/budget/new", - tags=["budget management"], - dependencies=[Depends(user_api_key_auth)], -) -async def new_budget( - budget_obj: BudgetNew, - user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), -): - """ - Create a new budget object. Can apply this to teams, orgs, end-users, keys. - """ - global prisma_client - - if prisma_client is None: - raise HTTPException( - status_code=500, - detail={"error": CommonProxyErrors.db_not_connected_error.value}, - ) - - response = await prisma_client.db.litellm_budgettable.create( - data={ - **budget_obj.model_dump(exclude_none=True), # type: ignore - "created_by": user_api_key_dict.user_id or litellm_proxy_admin_name, - "updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name, - } # type: ignore - ) - - return response - - -@router.post( - "/budget/update", - tags=["budget management"], - dependencies=[Depends(user_api_key_auth)], -) -async def update_budget( - budget_obj: BudgetNew, - user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), -): - """ - Create a new budget object. Can apply this to teams, orgs, end-users, keys. - """ - global prisma_client - - if prisma_client is None: - raise HTTPException( - status_code=500, - detail={"error": CommonProxyErrors.db_not_connected_error.value}, - ) - if budget_obj.budget_id is None: - raise HTTPException(status_code=400, detail={"error": "budget_id is required"}) - - response = await prisma_client.db.litellm_budgettable.update( - where={"budget_id": budget_obj.budget_id}, - data={ - **budget_obj.model_dump(exclude_none=True), # type: ignore - "updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name, - }, # type: ignore - ) - - return response - - -@router.post( - "/budget/info", - tags=["budget management"], - dependencies=[Depends(user_api_key_auth)], -) -async def info_budget(data: BudgetRequest): - """ - Get the budget id specific information - """ - global prisma_client - - if prisma_client is None: - raise HTTPException(status_code=500, detail={"error": "No db connected"}) - - if len(data.budgets) == 0: - raise HTTPException( - status_code=400, - detail={ - "error": f"Specify list of budget id's to query. Passed in={data.budgets}" - }, - ) - response = await prisma_client.db.litellm_budgettable.find_many( - where={"budget_id": {"in": data.budgets}}, - ) - - return response - - -@router.get( - "/budget/settings", - tags=["budget management"], - dependencies=[Depends(user_api_key_auth)], -) -async def budget_settings( - budget_id: str, - user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), -): - """ - Get list of configurable params + current value for a budget item + description of each field - - Used on Admin UI. - """ - if prisma_client is None: - raise HTTPException( - status_code=400, - detail={"error": CommonProxyErrors.db_not_connected_error.value}, - ) - - if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN: - raise HTTPException( - status_code=400, - detail={ - "error": "{}, your role={}".format( - CommonProxyErrors.not_allowed_access.value, - user_api_key_dict.user_role, - ) - }, - ) - - ## get budget item from db - db_budget_row = await prisma_client.db.litellm_budgettable.find_first( - where={"budget_id": budget_id} - ) - - if db_budget_row is not None: - db_budget_row_dict = db_budget_row.model_dump(exclude_none=True) - else: - db_budget_row_dict = {} - - allowed_args = { - "max_parallel_requests": {"type": "Integer"}, - "tpm_limit": {"type": "Integer"}, - "rpm_limit": {"type": "Integer"}, - "budget_duration": {"type": "String"}, - "max_budget": {"type": "Float"}, - "soft_budget": {"type": "Float"}, - } - - return_val = [] - - for field_name, field_info in BudgetNew.model_fields.items(): - if field_name in allowed_args: - - _stored_in_db = True - - _response_obj = ConfigList( - field_name=field_name, - field_type=allowed_args[field_name]["type"], - field_description=field_info.description or "", - field_value=db_budget_row_dict.get(field_name, None), - stored_in_db=_stored_in_db, - field_default_value=field_info.default, - ) - return_val.append(_response_obj) - - return return_val - - -@router.get( - "/budget/list", - tags=["budget management"], - dependencies=[Depends(user_api_key_auth)], -) -async def list_budget( - user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), -): - """List all the created budgets in proxy db. Used on Admin UI.""" - if prisma_client is None: - raise HTTPException( - status_code=400, - detail={"error": CommonProxyErrors.db_not_connected_error.value}, - ) - - if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN: - raise HTTPException( - status_code=400, - detail={ - "error": "{}, your role={}".format( - CommonProxyErrors.not_allowed_access.value, - user_api_key_dict.user_role, - ) - }, - ) - - response = await prisma_client.db.litellm_budgettable.find_many() - - return response - - -@router.post( - "/budget/delete", - tags=["budget management"], - dependencies=[Depends(user_api_key_auth)], -) -async def delete_budget( - data: BudgetDeleteRequest, - user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), -): - """Delete budget""" - global prisma_client - - if prisma_client is None: - raise HTTPException( - status_code=500, - detail={"error": CommonProxyErrors.db_not_connected_error.value}, - ) - - if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN: - raise HTTPException( - status_code=400, - detail={ - "error": "{}, your role={}".format( - CommonProxyErrors.not_allowed_access.value, - user_api_key_dict.user_role, - ) - }, - ) - - response = await prisma_client.db.litellm_budgettable.delete( - where={"budget_id": data.id} - ) - - return response - - #### MODEL MANAGEMENT #### @@ -8856,3 +8627,4 @@ app.include_router(debugging_endpoints_router) app.include_router(ui_crud_endpoints_router) app.include_router(openai_files_router) app.include_router(team_callback_router) +app.include_router(budget_management_router) diff --git a/litellm/proxy/spend_tracking/spend_management_endpoints.py b/litellm/proxy/spend_tracking/spend_management_endpoints.py index 6af8593bd7..81968f9e0a 100644 --- a/litellm/proxy/spend_tracking/spend_management_endpoints.py +++ b/litellm/proxy/spend_tracking/spend_management_endpoints.py @@ -2533,8 +2533,8 @@ async def provider_budgets() -> ProviderBudgetResponse: _provider ) provider_budget_response_object = ProviderBudgetResponseObject( - budget_limit=_budget_info.budget_limit, - time_period=_budget_info.time_period, + budget_limit=_budget_info.max_budget, + time_period=_budget_info.budget_duration, spend=_provider_spend, budget_reset_at=_provider_budget_ttl, ) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index c93652f60c..896a04cd8d 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -1018,6 +1018,19 @@ def on_backoff(details): print_verbose(f"Backing off... this was attempt #{details['tries']}") +def jsonify_object(data: dict) -> dict: + db_data = copy.deepcopy(data) + + for k, v in db_data.items(): + if isinstance(v, dict): + try: + db_data[k] = json.dumps(v) + except Exception: + # This avoids Prisma retrying this 5 times, and making 5 clients + db_data[k] = "failed-to-serialize-json" + return db_data + + class PrismaClient: user_list_transactons: dict = {} end_user_list_transactons: dict = {} @@ -1503,25 +1516,31 @@ class PrismaClient: ) sql_query = f""" - SELECT - v.*, - t.spend AS team_spend, - t.max_budget AS team_max_budget, - t.tpm_limit AS team_tpm_limit, - t.rpm_limit AS team_rpm_limit, - t.models AS team_models, - t.metadata AS team_metadata, - t.blocked AS team_blocked, - t.team_alias AS team_alias, - t.metadata AS team_metadata, - t.members_with_roles AS team_members_with_roles, - tm.spend AS team_member_spend, - m.aliases as team_model_aliases - FROM "LiteLLM_VerificationToken" AS v - LEFT JOIN "LiteLLM_TeamTable" AS t ON v.team_id = t.team_id - LEFT JOIN "LiteLLM_TeamMembership" AS tm ON v.team_id = tm.team_id AND tm.user_id = v.user_id - LEFT JOIN "LiteLLM_ModelTable" m ON t.model_id = m.id - WHERE v.token = '{token}' + SELECT + v.*, + t.spend AS team_spend, + t.max_budget AS team_max_budget, + t.tpm_limit AS team_tpm_limit, + t.rpm_limit AS team_rpm_limit, + t.models AS team_models, + t.metadata AS team_metadata, + t.blocked AS team_blocked, + t.team_alias AS team_alias, + t.metadata AS team_metadata, + t.members_with_roles AS team_members_with_roles, + tm.spend AS team_member_spend, + m.aliases AS team_model_aliases, + -- Added comma to separate b.* columns + b.max_budget AS litellm_budget_table_max_budget, + b.tpm_limit AS litellm_budget_table_tpm_limit, + b.rpm_limit AS litellm_budget_table_rpm_limit, + b.model_max_budget as litellm_budget_table_model_max_budget + FROM "LiteLLM_VerificationToken" AS v + LEFT JOIN "LiteLLM_TeamTable" AS t ON v.team_id = t.team_id + LEFT JOIN "LiteLLM_TeamMembership" AS tm ON v.team_id = tm.team_id AND tm.user_id = v.user_id + LEFT JOIN "LiteLLM_ModelTable" m ON t.model_id = m.id + LEFT JOIN "LiteLLM_BudgetTable" AS b ON v.budget_id = b.budget_id + WHERE v.token = '{token}' """ print_verbose("sql_query being made={}".format(sql_query)) @@ -1634,6 +1653,7 @@ class PrismaClient: "create": {**db_data}, # type: ignore "update": {}, # don't do anything if it already exists }, + include={"litellm_budget_table": True}, ) verbose_proxy_logger.info("Data Inserted into Keys Table") return new_verification_token diff --git a/litellm/router.py b/litellm/router.py index 3cd1ef4c2f..7aa2528504 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -98,7 +98,6 @@ from litellm.types.router import ( CustomRoutingStrategyBase, Deployment, DeploymentTypedDict, - GenericBudgetConfigType, LiteLLM_Params, ModelGroupInfo, OptionalPreCallChecks, @@ -111,6 +110,7 @@ from litellm.types.router import ( RoutingStrategy, ) from litellm.types.services import ServiceTypes +from litellm.types.utils import GenericBudgetConfigType from litellm.types.utils import ModelInfo as ModelMapInfo from litellm.types.utils import StandardLoggingPayload from litellm.utils import ( diff --git a/litellm/router_strategy/budget_limiter.py b/litellm/router_strategy/budget_limiter.py index 2d20e19a8e..920f6c0881 100644 --- a/litellm/router_strategy/budget_limiter.py +++ b/litellm/router_strategy/budget_limiter.py @@ -33,14 +33,10 @@ from litellm.router_utils.cooldown_callbacks import ( _get_prometheus_logger_from_callbacks, ) from litellm.types.llms.openai import AllMessageValues -from litellm.types.router import ( - DeploymentTypedDict, - GenericBudgetConfigType, - GenericBudgetInfo, - LiteLLM_Params, - RouterErrors, -) -from litellm.types.utils import BudgetConfig, StandardLoggingPayload +from litellm.types.router import DeploymentTypedDict, LiteLLM_Params, RouterErrors +from litellm.types.utils import BudgetConfig +from litellm.types.utils import BudgetConfig as GenericBudgetInfo +from litellm.types.utils import GenericBudgetConfigType, StandardLoggingPayload DEFAULT_REDIS_SYNC_INTERVAL = 1 @@ -170,17 +166,19 @@ class RouterBudgetLimiting(CustomLogger): provider = self._get_llm_provider_for_deployment(deployment) if provider in provider_configs: config = provider_configs[provider] + if config.max_budget is None: + continue current_spend = spend_map.get( - f"provider_spend:{provider}:{config.time_period}", 0.0 + f"provider_spend:{provider}:{config.budget_duration}", 0.0 ) self._track_provider_remaining_budget_prometheus( provider=provider, spend=current_spend, - budget_limit=config.budget_limit, + budget_limit=config.max_budget, ) - if current_spend >= config.budget_limit: - debug_msg = f"Exceeded budget for provider {provider}: {current_spend} >= {config.budget_limit}" + if config.max_budget and current_spend >= config.max_budget: + debug_msg = f"Exceeded budget for provider {provider}: {current_spend} >= {config.max_budget}" deployment_above_budget_info += f"{debug_msg}\n" is_within_budget = False continue @@ -194,30 +192,32 @@ class RouterBudgetLimiting(CustomLogger): if model_id in deployment_configs: config = deployment_configs[model_id] current_spend = spend_map.get( - f"deployment_spend:{model_id}:{config.time_period}", 0.0 + f"deployment_spend:{model_id}:{config.budget_duration}", 0.0 ) - if current_spend >= config.budget_limit: - debug_msg = f"Exceeded budget for deployment model_name: {_model_name}, litellm_params.model: {_litellm_model_name}, model_id: {model_id}: {current_spend} >= {config.budget_limit}" + if config.max_budget and current_spend >= config.max_budget: + debug_msg = f"Exceeded budget for deployment model_name: {_model_name}, litellm_params.model: {_litellm_model_name}, model_id: {model_id}: {current_spend} >= {config.budget_duration}" verbose_router_logger.debug(debug_msg) deployment_above_budget_info += f"{debug_msg}\n" is_within_budget = False continue - # Check tag budget if self.tag_budget_config and is_within_budget: for _tag in request_tags: _tag_budget_config = self._get_budget_config_for_tag(_tag) if _tag_budget_config: _tag_spend = spend_map.get( - f"tag_spend:{_tag}:{_tag_budget_config.time_period}", 0.0 + f"tag_spend:{_tag}:{_tag_budget_config.budget_duration}", + 0.0, ) - if _tag_spend >= _tag_budget_config.budget_limit: - debug_msg = f"Exceeded budget for tag='{_tag}', tag_spend={_tag_spend}, tag_budget_limit={_tag_budget_config.budget_limit}" + if ( + _tag_budget_config.max_budget + and _tag_spend >= _tag_budget_config.max_budget + ): + debug_msg = f"Exceeded budget for tag='{_tag}', tag_spend={_tag_spend}, tag_budget_limit={_tag_budget_config.max_budget}" verbose_router_logger.debug(debug_msg) deployment_above_budget_info += f"{debug_msg}\n" is_within_budget = False continue - if is_within_budget: potential_deployments.append(deployment) @@ -247,10 +247,13 @@ class RouterBudgetLimiting(CustomLogger): provider = self._get_llm_provider_for_deployment(deployment) if provider is not None: budget_config = self._get_budget_config_for_provider(provider) - if budget_config is not None: + if ( + budget_config is not None + and budget_config.budget_duration is not None + ): provider_configs[provider] = budget_config cache_keys.append( - f"provider_spend:{provider}:{budget_config.time_period}" + f"provider_spend:{provider}:{budget_config.budget_duration}" ) # Check deployment budgets @@ -261,7 +264,7 @@ class RouterBudgetLimiting(CustomLogger): if budget_config is not None: deployment_configs[model_id] = budget_config cache_keys.append( - f"deployment_spend:{model_id}:{budget_config.time_period}" + f"deployment_spend:{model_id}:{budget_config.budget_duration}" ) # Check tag budgets if self.tag_budget_config: @@ -272,7 +275,7 @@ class RouterBudgetLimiting(CustomLogger): _tag_budget_config = self._get_budget_config_for_tag(_tag) if _tag_budget_config: cache_keys.append( - f"tag_spend:{_tag}:{_tag_budget_config.time_period}" + f"tag_spend:{_tag}:{_tag_budget_config.budget_duration}" ) return cache_keys, provider_configs, deployment_configs @@ -365,7 +368,7 @@ class RouterBudgetLimiting(CustomLogger): if budget_config: # increment spend for provider spend_key = ( - f"provider_spend:{custom_llm_provider}:{budget_config.time_period}" + f"provider_spend:{custom_llm_provider}:{budget_config.budget_duration}" ) start_time_key = f"provider_budget_start_time:{custom_llm_provider}" await self._increment_spend_for_key( @@ -378,9 +381,7 @@ class RouterBudgetLimiting(CustomLogger): deployment_budget_config = self._get_budget_config_for_deployment(model_id) if deployment_budget_config: # increment spend for specific deployment id - deployment_spend_key = ( - f"deployment_spend:{model_id}:{deployment_budget_config.time_period}" - ) + deployment_spend_key = f"deployment_spend:{model_id}:{deployment_budget_config.budget_duration}" deployment_start_time_key = f"deployment_budget_start_time:{model_id}" await self._increment_spend_for_key( budget_config=deployment_budget_config, @@ -395,7 +396,7 @@ class RouterBudgetLimiting(CustomLogger): _tag_budget_config = self._get_budget_config_for_tag(_tag) if _tag_budget_config: _tag_spend_key = ( - f"tag_spend:{_tag}:{_tag_budget_config.time_period}" + f"tag_spend:{_tag}:{_tag_budget_config.budget_duration}" ) _tag_start_time_key = f"tag_budget_start_time:{_tag}" await self._increment_spend_for_key( @@ -412,8 +413,11 @@ class RouterBudgetLimiting(CustomLogger): start_time_key: str, response_cost: float, ): + if budget_config.budget_duration is None: + return + current_time = datetime.now(timezone.utc).timestamp() - ttl_seconds = duration_in_seconds(budget_config.time_period) + ttl_seconds = duration_in_seconds(budget_config.budget_duration) budget_start = await self._get_or_set_budget_start_time( start_time_key=start_time_key, @@ -529,21 +533,23 @@ class RouterBudgetLimiting(CustomLogger): for provider, config in self.provider_budget_config.items(): if config is None: continue - cache_keys.append(f"provider_spend:{provider}:{config.time_period}") + cache_keys.append( + f"provider_spend:{provider}:{config.budget_duration}" + ) if self.deployment_budget_config is not None: for model_id, config in self.deployment_budget_config.items(): if config is None: continue cache_keys.append( - f"deployment_spend:{model_id}:{config.time_period}" + f"deployment_spend:{model_id}:{config.budget_duration}" ) if self.tag_budget_config is not None: for tag, config in self.tag_budget_config.items(): if config is None: continue - cache_keys.append(f"tag_spend:{tag}:{config.time_period}") + cache_keys.append(f"tag_spend:{tag}:{config.budget_duration}") # Batch fetch current spend values from Redis redis_values = await self.dual_cache.redis_cache.async_batch_get_cache( @@ -635,7 +641,7 @@ class RouterBudgetLimiting(CustomLogger): if budget_config is None: return None - spend_key = f"provider_spend:{provider}:{budget_config.time_period}" + spend_key = f"provider_spend:{provider}:{budget_config.budget_duration}" if self.dual_cache.redis_cache: # use Redis as source of truth since that has spend across all instances @@ -652,7 +658,7 @@ class RouterBudgetLimiting(CustomLogger): if budget_config is None: return None - spend_key = f"provider_spend:{provider}:{budget_config.time_period}" + spend_key = f"provider_spend:{provider}:{budget_config.budget_duration}" if self.dual_cache.redis_cache: ttl_seconds = await self.dual_cache.redis_cache.async_get_ttl(spend_key) else: @@ -672,9 +678,13 @@ class RouterBudgetLimiting(CustomLogger): - provider_budget_start_time:{provider} - stores the start time of the budget window """ - spend_key = f"provider_spend:{provider}:{budget_config.time_period}" + + spend_key = f"provider_spend:{provider}:{budget_config.budget_duration}" start_time_key = f"provider_budget_start_time:{provider}" - ttl_seconds = duration_in_seconds(budget_config.time_period) + ttl_seconds: Optional[int] = None + if budget_config.budget_duration is not None: + ttl_seconds = duration_in_seconds(budget_config.budget_duration) + budget_start = await self.dual_cache.async_get_cache(start_time_key) if budget_start is None: budget_start = datetime.now(timezone.utc).timestamp() diff --git a/litellm/types/router.py b/litellm/types/router.py index e5d6511359..f1a1f44480 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -11,8 +11,6 @@ import httpx from pydantic import BaseModel, ConfigDict, Field from typing_extensions import Required, TypedDict -from litellm.types.utils import GenericBudgetConfigType, GenericBudgetInfo - from ..exceptions import RateLimitError from .completion import CompletionRequest from .embedding import EmbeddingRequest diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 934b56e486..957ce3ff5b 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -1694,17 +1694,25 @@ class StandardKeyGenerationConfig(TypedDict, total=False): personal_key_generation: PersonalUIKeyGenerationConfig -class GenericBudgetInfo(BaseModel): - time_period: str # e.g., '1d', '30d' - budget_limit: float - - -GenericBudgetConfigType = Dict[str, GenericBudgetInfo] - - class BudgetConfig(BaseModel): - max_budget: float - budget_duration: str + max_budget: Optional[float] = None + budget_duration: Optional[str] = None + tpm_limit: Optional[int] = None + rpm_limit: Optional[int] = None + + def __init__(self, **data: Any) -> None: + # Map time_period to budget_duration if present + if "time_period" in data: + data["budget_duration"] = data.pop("time_period") + + # Map budget_limit to max_budget if present + if "budget_limit" in data: + data["max_budget"] = data.pop("budget_limit") + + super().__init__(**data) + + +GenericBudgetConfigType = Dict[str, BudgetConfig] class LlmProviders(str, Enum): diff --git a/tests/documentation_tests/test_api_docs.py b/tests/documentation_tests/test_api_docs.py index edab7c1d34..2faac371c3 100644 --- a/tests/documentation_tests/test_api_docs.py +++ b/tests/documentation_tests/test_api_docs.py @@ -172,6 +172,11 @@ def main(): "delete_organization", "list_organization", "user_update", + "new_budget", + "info_budget", + "update_budget", + "delete_budget", + "list_budget", ] # directory = "../../litellm/proxy/management_endpoints" # LOCAL directory = "./litellm/proxy/management_endpoints" diff --git a/tests/local_testing/test_router_budget_limiter.py b/tests/local_testing/test_router_budget_limiter.py index 8ca1f4e767..8d4948f8f9 100644 --- a/tests/local_testing/test_router_budget_limiter.py +++ b/tests/local_testing/test_router_budget_limiter.py @@ -14,15 +14,13 @@ from litellm import Router from litellm.router_strategy.budget_limiter import RouterBudgetLimiting from litellm.types.router import ( RoutingStrategy, - GenericBudgetConfigType, - GenericBudgetInfo, ) +from litellm.types.utils import GenericBudgetConfigType, BudgetConfig from litellm.caching.caching import DualCache, RedisCache import logging from litellm._logging import verbose_router_logger import litellm from datetime import timezone, timedelta -from litellm.types.utils import BudgetConfig verbose_router_logger.setLevel(logging.DEBUG) @@ -67,8 +65,8 @@ async def test_provider_budgets_e2e_test(): cleanup_redis() # Modify for test provider_budget_config: GenericBudgetConfigType = { - "openai": GenericBudgetInfo(time_period="1d", budget_limit=0.000000000001), - "azure": GenericBudgetInfo(time_period="1d", budget_limit=100), + "openai": BudgetConfig(time_period="1d", budget_limit=0.000000000001), + "azure": BudgetConfig(time_period="1d", budget_limit=100), } router = Router( @@ -215,8 +213,8 @@ async def test_get_budget_config_for_provider(): """ cleanup_redis() config = { - "openai": GenericBudgetInfo(time_period="1d", budget_limit=100), - "anthropic": GenericBudgetInfo(time_period="7d", budget_limit=500), + "openai": BudgetConfig(budget_duration="1d", max_budget=100), + "anthropic": BudgetConfig(budget_duration="7d", max_budget=500), } provider_budget = RouterBudgetLimiting( @@ -226,13 +224,13 @@ async def test_get_budget_config_for_provider(): # Test existing providers openai_config = provider_budget._get_budget_config_for_provider("openai") assert openai_config is not None - assert openai_config.time_period == "1d" - assert openai_config.budget_limit == 100 + assert openai_config.budget_duration == "1d" + assert openai_config.max_budget == 100 anthropic_config = provider_budget._get_budget_config_for_provider("anthropic") assert anthropic_config is not None - assert anthropic_config.time_period == "7d" - assert anthropic_config.budget_limit == 500 + assert anthropic_config.budget_duration == "7d" + assert anthropic_config.max_budget == 500 # Test non-existent provider assert provider_budget._get_budget_config_for_provider("unknown") is None @@ -254,15 +252,15 @@ async def test_prometheus_metric_tracking(): provider_budget = RouterBudgetLimiting( dual_cache=DualCache(), provider_budget_config={ - "openai": GenericBudgetInfo(time_period="1d", budget_limit=100) + "openai": BudgetConfig(budget_duration="1d", max_budget=100) }, ) litellm._async_success_callback = [mock_prometheus] provider_budget_config: GenericBudgetConfigType = { - "openai": GenericBudgetInfo(time_period="1d", budget_limit=0.000000000001), - "azure": GenericBudgetInfo(time_period="1d", budget_limit=100), + "openai": BudgetConfig(budget_duration="1d", max_budget=0.000000000001), + "azure": BudgetConfig(budget_duration="1d", max_budget=100), } router = Router( @@ -442,8 +440,8 @@ async def test_sync_in_memory_spend_with_redis(): """ cleanup_redis() provider_budget_config = { - "openai": GenericBudgetInfo(time_period="1d", budget_limit=100), - "anthropic": GenericBudgetInfo(time_period="1d", budget_limit=200), + "openai": BudgetConfig(time_period="1d", budget_limit=100), + "anthropic": BudgetConfig(time_period="1d", budget_limit=200), } provider_budget = RouterBudgetLimiting( @@ -497,7 +495,7 @@ async def test_get_current_provider_spend(): provider_budget = RouterBudgetLimiting( dual_cache=DualCache(), provider_budget_config={ - "openai": GenericBudgetInfo(time_period="1d", budget_limit=100), + "openai": BudgetConfig(time_period="1d", budget_limit=100), }, ) @@ -538,8 +536,8 @@ async def test_get_current_provider_budget_reset_at(): ) ), provider_budget_config={ - "openai": GenericBudgetInfo(time_period="1d", budget_limit=100), - "vertex_ai": GenericBudgetInfo(time_period="1h", budget_limit=100), + "openai": BudgetConfig(budget_duration="1d", max_budget=100), + "vertex_ai": BudgetConfig(budget_duration="1h", max_budget=100), }, ) diff --git a/tests/proxy_admin_ui_tests/test_key_management.py b/tests/proxy_admin_ui_tests/test_key_management.py index 9104d7b9a7..2314d67c61 100644 --- a/tests/proxy_admin_ui_tests/test_key_management.py +++ b/tests/proxy_admin_ui_tests/test_key_management.py @@ -777,3 +777,68 @@ async def test_user_info_as_proxy_admin(prisma_client): assert user_info_response.keys is not None assert len(user_info_response.keys) > 0, "Expected at least one key in response" + + +@pytest.mark.asyncio +async def test_key_update_with_model_specific_params(prisma_client): + setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + await litellm.proxy.proxy_server.prisma_client.connect() + + from litellm.proxy.management_endpoints.key_management_endpoints import ( + update_key_fn, + ) + from litellm.proxy._types import UpdateKeyRequest + + new_key = await generate_key_fn( + data=GenerateKeyRequest(models=["gpt-4"]), + user_api_key_dict=UserAPIKeyAuth( + user_role=LitellmUserRoles.PROXY_ADMIN, + api_key="sk-1234", + user_id="1234", + ), + ) + + generated_key = new_key.key + token_hash = new_key.token_id + print(generated_key) + + request = Request(scope={"type": "http"}) + request._url = URL(url="/update/key") + + args = { + "key_alias": f"test-key_{uuid.uuid4()}", + "duration": None, + "models": ["all-team-models"], + "spend": 0, + "max_budget": None, + "user_id": "default_user_id", + "team_id": None, + "max_parallel_requests": None, + "metadata": { + "model_tpm_limit": {"fake-openai-endpoint": 10}, + "model_rpm_limit": {"fake-openai-endpoint": 0}, + }, + "tpm_limit": None, + "rpm_limit": None, + "budget_duration": None, + "allowed_cache_controls": [], + "soft_budget": None, + "config": {}, + "permissions": {}, + "model_max_budget": {}, + "send_invite_email": None, + "model_rpm_limit": None, + "model_tpm_limit": None, + "guardrails": None, + "blocked": None, + "aliases": {}, + "key": token_hash, + "budget_id": None, + "key_name": "sk-...2GWA", + "expires": None, + "token_id": token_hash, + "litellm_budget_table": None, + "token": token_hash, + } + await update_key_fn(request=request, data=UpdateKeyRequest(**args)) diff --git a/tests/proxy_unit_tests/test_proxy_utils.py b/tests/proxy_unit_tests/test_proxy_utils.py index 5413bfce73..3b3da7b982 100644 --- a/tests/proxy_unit_tests/test_proxy_utils.py +++ b/tests/proxy_unit_tests/test_proxy_utils.py @@ -1,6 +1,7 @@ import asyncio import os import sys +from typing import Any, Dict from unittest.mock import Mock from litellm.proxy.utils import _get_redoc_url, _get_docs_url import json @@ -1104,3 +1105,89 @@ def test_proxy_config_state_post_init_callback_call(): config = pc.get_config_state() assert config["litellm_settings"]["default_team_settings"][0]["team_id"] == "test" + + +@pytest.mark.parametrize( + "associated_budget_table, expected_user_api_key_auth_key, expected_user_api_key_auth_value", + [ + ( + { + "litellm_budget_table_max_budget": None, + "litellm_budget_table_tpm_limit": None, + "litellm_budget_table_rpm_limit": 1, + "litellm_budget_table_model_max_budget": None, + }, + "rpm_limit", + 1, + ), + ( + {}, + None, + None, + ), + ( + { + "litellm_budget_table_max_budget": None, + "litellm_budget_table_tpm_limit": None, + "litellm_budget_table_rpm_limit": None, + "litellm_budget_table_model_max_budget": {"gpt-4o": 100}, + }, + "model_max_budget", + {"gpt-4o": 100}, + ), + ], +) +def test_litellm_verification_token_view_response_with_budget_table( + associated_budget_table, + expected_user_api_key_auth_key, + expected_user_api_key_auth_value, +): + from litellm.proxy._types import LiteLLM_VerificationTokenView + + args: Dict[str, Any] = { + "token": "78b627d4d14bc3acf5571ae9cb6834e661bc8794d1209318677387add7621ce1", + "key_name": "sk-...if_g", + "key_alias": None, + "soft_budget_cooldown": False, + "spend": 0.011441999999999997, + "expires": None, + "models": [], + "aliases": {}, + "config": {}, + "user_id": None, + "team_id": "test", + "permissions": {}, + "max_parallel_requests": None, + "metadata": {}, + "blocked": None, + "tpm_limit": None, + "rpm_limit": None, + "max_budget": None, + "budget_duration": None, + "budget_reset_at": None, + "allowed_cache_controls": [], + "model_spend": {}, + "model_max_budget": {}, + "budget_id": "my-test-tier", + "created_at": "2024-12-26T02:28:52.615+00:00", + "updated_at": "2024-12-26T03:01:51.159+00:00", + "team_spend": 0.012134999999999998, + "team_max_budget": None, + "team_tpm_limit": None, + "team_rpm_limit": None, + "team_models": [], + "team_metadata": {}, + "team_blocked": False, + "team_alias": None, + "team_members_with_roles": [{"role": "admin", "user_id": "default_user_id"}], + "team_member_spend": None, + "team_model_aliases": None, + "team_member": None, + **associated_budget_table, + } + resp = LiteLLM_VerificationTokenView(**args) + if expected_user_api_key_auth_key is not None: + assert ( + getattr(resp, expected_user_api_key_auth_key) + == expected_user_api_key_auth_value + ) diff --git a/tests/proxy_unit_tests/test_unit_test_max_model_budget_limiter.py b/tests/proxy_unit_tests/test_unit_test_max_model_budget_limiter.py index 82d85cf2f7..fc8373a174 100644 --- a/tests/proxy_unit_tests/test_unit_test_max_model_budget_limiter.py +++ b/tests/proxy_unit_tests/test_unit_test_max_model_budget_limiter.py @@ -13,7 +13,7 @@ import pytest import litellm import json -from litellm.types.utils import GenericBudgetInfo +from litellm.types.utils import BudgetConfig as GenericBudgetInfo import os import sys from datetime import datetime @@ -56,13 +56,13 @@ def test_get_request_model_budget_config(budget_limiter): config = budget_limiter._get_request_model_budget_config( model="gpt-4", internal_model_max_budget=internal_budget ) - assert config.budget_limit == 100.0 + assert config.max_budget == 100.0 # Test model with provider config = budget_limiter._get_request_model_budget_config( model="openai/gpt-4", internal_model_max_budget=internal_budget ) - assert config.budget_limit == 100.0 + assert config.max_budget == 100.0 # Test non-existent model config = budget_limiter._get_request_model_budget_config(