{ "personal_info": { "name": "Umang Bhalla", "location": "smwhere in india", "description": "platform/mlops/devops/llmops/llm-agents engineer, shitpost demigod", "born": "March 2001", "other_links": { "github": "https://github.com/umgbhalla", "x": "https://x.com/umgbhalla", "discord": "umgbhalla", "linkedin": "https://linkedin.com/in/umgbhalla", "resume": "dm on x.com/umgbhalla" } }, "timezone": "GMT+5:30", "work_experience": [ { "year": "2024-present", "company": "NewEngen.com", "role": "DevOps Engineer", "location": "remote", "hq": "us", "work": [ "migration from gcp cloud run to kubernetes, and cost optimization", "istio service mesh integration for micro service architecture and custom auth with firebase integration across all endpoints (k8s native apigateway and ingress design)", "CI/CD pipeline for 50+ services", "LLM Agent design and implementation for marketting analytics, report generation, trend analysis and more (LIFT AI)", "org campaign/ads/adset taxonomy collection tool, system and product design", "transformer based recommendation system for taxonomy tool, training and model refresh pipeline, eval systems" ], "tech": [ "kubernetes", "gcp", "istio", "firebase", "python", "bash", "prometheus", "grafana", "agno", "pytorch", "qdrant" ] }, { "year": "2023-2024", "company": "Reint.ai", "role": "MLOps Consultant", "location": "remote", "hq": "au", "work": [ "mlops monitoring system, version control and metric logging for internal model train runs", "high frequency monitoring and alerting system for vms, and prod apis", "custom training infra to utilise multiple gpus, spot instances, and resume training from last checkpoint for long runs", "ablation studies on tranformer models for time series forecasting", "logging and stats infra with elastic, logstash and kibana" ], "tech": [ "docker", "gcp", "python", "bash", "elastic", "logstash", "kibana", "pytorch", "autotrain", "clearml" ] }, { "year": "2022-2023", "company": "Bytelearn.com", "role": "DevOps Consultant", "location": "remote", "hq": "india", "work": [ "cost optimization on aws, over 77% savings on annual bill", "migration to kubernetes", "migration to gcp from aws", "opentelemetry integration for tracing and monitoring", "high availability monitoring and observability setup for opentelemetry with promscale, tsdb, graphana, prometheus", "multi node mysql and postgres deployment with perconadb", "CI/CD and uptime ownership for 30+ services", "automated testing env setup" ], "tech": [ "kubernetes", "argocd", "docker", "gcp", "python", "bash", "prometheus", "grafana", "mysql", "postgres", "perconadb", "redis", "tsdb" ] } ], "summary": { "infrastructure": [ "kubernetes", "podman", "docker", "gke", "eks", "argocd", "terraform", "helm", "gcp", "aws", "azure", "istio", "prometheus", "grafana", "github actions" ], "databases": [ "perconadb", "postgresql", "mysql", "redis (cluster)", "mongodb", "elasticsearch", "iceberg", "hudi" ], "ml_ops": [ "clearml", "mlflow", "weights & biases", "autotrain", "pytorch", "tensorflow", "qdrant", "custom llm agents" ], "programming_languages": [ "python", "bash", "rust", "go", "javascript", "typescript" ], "monitoring_logging": [ "prometheus", "grafana", "elastic stack (elk)", "opentelemetry", "logstash", "kibana" ], "ci_cd": [ "github actions", "argocd", "gitlab ci", "circleci" ], "cloud": [ "gcp", "aws", "azure" ], "finops": [ "cost optimization", "billing analysis", "resource tagging" ], "other": [ "custom dev tools", "system design", "automation scripting" ] }, "format_source": "curl https://umgbhalla.xyz | jless" }