Working with GeoJSON in Python
Use GeoJSON in Python with json Shapely GeoPandas and Fiona. Read write transform and serve spatial data.
Reading GeoJSON with stdlib json
For files under ~50 MB where you need no geometry operations — just reading properties or iterating features — the standard library is sufficient. No dependencies, no installation, ships with every Python 3.x interpreter.
pythonimport json
with open("features.geojson") as f:
data = json.load(f)
for feature in data["features"]:
props = feature["properties"]
coords = feature["geometry"]["coordinates"]
print(props.get("name"), coords)This pattern works until you need to query by location, compute areas, or reproject. The moment you write if coords[0] > x and coords[1] > y to filter by bounding box, stop and use GeoPandas instead — you are reimplementing spatial indexing by hand.
Shapely for Geometry Operations
Shapely (2.0+, built on GEOS 3.11+) is the core geometry library for Python. It handles creation, manipulation, and measurement of geometries without any I/O or CRS awareness. Install with pip install shapely.
Creating Geometries
pythonfrom shapely.geometry import Point, LineString, Polygon, shape
# From explicit coordinates
point = Point(-73.9857, 40.7484)
line = LineString([(-73.9857, 40.7484), (-74.0060, 40.7128)])
poly = Polygon([(-74.01, 40.71), (-73.97, 40.71), (-73.97, 40.75), (-74.01, 40.75)])
# From a GeoJSON geometry dict (use shape() for interoperability)
geojson_geom = {"type": "Point", "coordinates": [-73.9857, 40.7484]}
point_from_geojson = shape(geojson_geom)Buffer
python# Buffer in degrees (EPSG:4326) — not meaningful for metric measurements
buffered = point.buffer(0.01) # ~1 km at mid-latitudes, imprecise
# For accurate metric buffers, project first (see pyproj section below)
# then buffer in meters, then reproject backUnion and Intersection
pythonfrom shapely.geometry import Polygon
from shapely.ops import unary_union
poly_a = Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])
poly_b = Polygon([(1, 1), (3, 1), (3, 3), (1, 3)])
intersection = poly_a.intersection(poly_b) # Polygon covering (1,1)-(2,2)
union = poly_a.union(poly_b) # L-shaped polygon
difference = poly_a.difference(poly_b) # poly_a minus the overlap
# Merge a list of polygons into one
polygons = [poly_a, poly_b, Polygon([(4, 0), (6, 0), (6, 2), (4, 2)])]
merged = unary_union(polygons)Area, Length, and Bounds
pythonprint(poly_a.area) # 4.0 (in coordinate units — degrees if EPSG:4326)
print(line.length) # Euclidean length in coordinate units
print(poly_a.bounds) # (minx, miny, maxx, maxy) tuple
print(point.distance(Point(1, 1))) # Euclidean distance
# Shapely 2.0 vectorized operations on arrays (much faster than looping)
import numpy as np
from shapely import distance, buffer, get_coordinates
points = [Point(i, i) for i in range(1000)]
areas = np.array([p.buffer(1).area for p in points]) # old style
# New style — operates on geometry arrays without Python loops:
from shapely import from_wkt, area as shp_area
geoms = [poly_a, poly_b]GeoPandas for Tabular Spatial Data
GeoPandas wraps Shapely geometries in a Pandas DataFrame column, giving you SQL-like operations (filter, join, groupby) with spatial awareness. Install with pip install geopandas. GeoPandas 0.14+ uses Shapely 2.0 internally, which is 10–100x faster than the GEOS-via-ctypes path in older versions.
Reading and Writing GeoJSON
pythonimport geopandas as gpd
# Read GeoJSON (also reads Shapefile, GeoPackage, FlatGeobuf — same API)
gdf = gpd.read_file("cities.geojson")
print(gdf.crs) # EPSG:4326 for RFC 7946 GeoJSON
print(gdf.shape) # (n_features, n_columns)
print(gdf.dtypes) # geometry column is 'geometry' dtype
print(gdf.head())
# Write back to GeoJSON
gdf.to_file("output.geojson", driver="GeoJSON")
# Write to GeoPackage (preferred for large datasets)
gdf.to_file("output.gpkg", driver="GPKG", layer="cities")Filtering by Property
python# Standard Pandas boolean indexing works on all non-geometry columns
large_cities = gdf[gdf["population"] > 1_000_000]
us_cities = gdf[gdf["country"] == "US"]
# Chain conditions
major_us = gdf[(gdf["country"] == "US") & (gdf["population"] > 500_000)]
# Query syntax
west_coast = gdf.query("state in ['CA', 'OR', 'WA']")
# Reset index after filtering to avoid index gaps
major_us = major_us.reset_index(drop=True)Spatial Joins
pythonimport geopandas as gpd
cities = gpd.read_file("cities.geojson") # Points
regions = gpd.read_file("regions.geojson") # Polygons
# Join cities to the region they fall within — adds region columns to cities
cities_with_region = cities.sjoin(regions, how="left", predicate="within")
# predicate options: "within", "intersects", "contains", "crosses", "touches"
# how options: "left", "right", "inner"
# Count cities per region
city_counts = cities_with_region.groupby("region_name").size().reset_index(name="city_count")Dissolve
python# Merge all features sharing the same attribute value into one geometry
# Equivalent to SQL: SELECT country, ST_Union(geom), SUM(population) FROM cities GROUP BY country
dissolved = gdf.dissolve(by="country", aggfunc={"population": "sum", "name": "count"})
# Dissolve everything into a single feature (e.g., merge all districts into one boundary)
single = gdf.dissolve() # no by= argumentFiona for Low-Level I/O
Fiona is the I/O layer that GeoPandas uses internally, exposed directly for cases where you need streaming reads, schema inspection, or format control without loading everything into memory. Install with pip install fiona. Fiona 1.9+ uses GDAL 3.x and supports FlatGeobuf, GeoPackage, GeoJSON, Shapefile, and 80+ other drivers.
Schema Inspection
pythonimport fiona
with fiona.open("data.geojson") as src:
print(src.driver) # "GeoJSON"
print(src.crs) # {"init": "epsg:4326"} or CRS object
print(src.schema) # {"geometry": "Point", "properties": {"name": "str", "pop": "int"}}
print(len(src)) # feature count (may require full scan for some formats)
print(src.bounds) # (minx, miny, maxx, maxy) of the entire layerStreaming Read (Memory-Efficient)
pythonimport fiona
# Process features one at a time — constant memory regardless of file size
with fiona.open("large_dataset.fgb") as src:
for feature in src:
geom_type = feature["geometry"]["type"]
props = feature["properties"]
# Process without ever holding more than one feature in memory
if props["population"] > 1_000_000:
yield feature # or write to output, etc.Filtered Read by Bounding Box
pythonimport fiona
bbox = (-74.1, 40.6, -73.7, 40.9) # (minx, miny, maxx, maxy) for NYC area
with fiona.open("countries.fgb") as src:
# For FlatGeobuf and GeoPackage, this uses the spatial index — fast
# For GeoJSON and Shapefile, it falls back to a full scan — slow
for feature in src.filter(bbox=bbox):
print(feature["properties"]["name"])Writing with Format Control
pythonimport fiona
from fiona.crs import from_epsg
schema = {
"geometry": "Point",
"properties": {"name": "str", "elevation_m": "float"}
}
with fiona.open("output.gpkg", "w", driver="GPKG", schema=schema, crs=from_epsg(4326)) as dst:
dst.write({
"geometry": {"type": "Point", "coordinates": [-73.9857, 40.7484]},
"properties": {"name": "Empire State Building", "elevation_m": 443.2}
})Coordinate Transformations with pyproj
pyproj wraps the PROJ library and handles all CRS transformations. Install with pip install pyproj. The Transformer class in pyproj 3.x is the correct API — the older Proj and transform() function API is deprecated.
EPSG:4326 to EPSG:3857 (Web Mercator)
pythonfrom pyproj import Transformer
# always_xy=True forces (longitude, latitude) input order regardless of CRS axis order
transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True)
# Single point: (longitude, latitude) -> (easting, northing) in meters
easting, northing = transformer.transform(-73.9857, 40.7484)
print(f"{easting:.2f}, {northing:.2f}") # -8228052.29, 4975376.17
# Inverse: EPSG:3857 back to EPSG:4326
inv_transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326", always_xy=True)
lon, lat = inv_transformer.transform(easting, northing)
print(f"{lon:.6f}, {lat:.6f}") # -73.985700, 40.748400Reprojecting an Entire GeoDataFrame
pythonimport geopandas as gpd
gdf = gpd.read_file("cities.geojson") # EPSG:4326
gdf_mercator = gdf.to_crs(epsg=3857) # EPSG:3857
# Now .area and .length return values in square meters and meters
print(gdf_mercator.geometry.area) # meaningful metric values
# For accurate area calculations, use an equal-area projection
gdf_equal_area = gdf.to_crs(epsg=6933) # NSIDC EASE-Grid 2.0 equal-area
print(gdf_equal_area.geometry.area / 1e6) # km²Transforming Individual Shapely Geometries
pythonfrom pyproj import Transformer
from shapely.ops import transform as shapely_transform
from shapely.geometry import Polygon
transformer = Transformer.from_crs("EPSG:4326", "EPSG:32618", always_xy=True)
poly_wgs84 = Polygon([(-74.01, 40.71), (-73.97, 40.71), (-73.97, 40.75), (-74.01, 40.75)])
poly_utm = shapely_transform(transformer.transform, poly_wgs84)
print(f"Area in m²: {poly_utm.area:.0f}") # accurate because UTM zone 18N is conformalPerformance Tips
orjson for Large File Parsing
orjson is a Rust-backed JSON library that is 3–10x faster than the stdlib json module for parsing. Parsing a 100 MB GeoJSON file takes ~1.8 s with stdlib json.load() and ~0.4 s with orjson on typical hardware. Install with pip install orjson.
pythonimport orjson
with open("large.geojson", "rb") as f: # open in binary mode for orjson
data = orjson.loads(f.read())
# orjson also serializes 3-5x faster than json.dumps
output = orjson.dumps(data, option=orjson.OPT_INDENT_2)GeoPandas .cx[] for Spatial Indexing
Never iterate over a GeoDataFrame to filter by bounding box. The .cx[] accessor uses the built-in STRtree spatial index (Shapely 2.0) and is orders of magnitude faster.
pythonimport geopandas as gpd
gdf = gpd.read_file("world_cities.geojson") # 100,000 features
# SLOW: Python loop, 100,000 bounding box checks
slow = [f for f in gdf.itertuples() if -10 < f.geometry.x < 40 and 35 < f.geometry.y < 72]
# FAST: STRtree index lookup, sub-millisecond for most queries
fast = gdf.cx[-10:40, 35:72] # [minx:maxx, miny:maxy]Streaming Large Files with Fiona
For files over 500 MB, loading into a GeoDataFrame may exhaust RAM. Use Fiona's streaming reader and write results feature-by-feature.
pythonimport fiona
with fiona.open("input_500mb.fgb") as src, fiona.open("output.fgb", "w", driver="FlatGeobuf",
schema=src.schema, crs=src.crs) as dst:
for feature in src:
if feature["properties"]["year"] >= 2020:
dst.write(feature)
# Peak memory: one feature at a time, not 500 MBGeoParquet for Analytical Workloads
GeoParquet stores geometries as WKB in a Parquet column, enabling columnar compression and predicate pushdown via DuckDB or pandas. A 1 GB GeoJSON file typically compresses to 80–150 MB as GeoParquet, and DuckDB can query it with spatial predicates without loading it fully into memory.
pythonimport geopandas as gpd
# Write GeoParquet (requires pyarrow: pip install pyarrow)
gdf = gpd.read_file("large.geojson")
gdf.to_parquet("output.parquet") # GeoPandas 0.12+ native support
# Read back selectively with row group filtering
gdf_back = gpd.read_parquet("output.parquet", bbox=(-74.1, 40.6, -73.7, 40.9))
# Query with DuckDB — no Python GeoDataFrame needed
import duckdb
duckdb.execute("INSTALL spatial; LOAD spatial;")
result = duckdb.sql("""
SELECT name, ST_Area(geom) as area_deg2
FROM read_parquet('output.parquet')
WHERE population > 1000000
""").df()Serving GeoJSON from Flask and FastAPI
Both frameworks can serve GeoJSON directly. The key detail is setting the correct Content-Type header (application/geo+json, registered with IANA in 2018) and enabling CORS for browser-based map clients.
Flask
pythonfrom flask import Flask, Response, request
import geopandas as gpd
import json
app = Flask(__name__)
gdf = gpd.read_file("cities.geojson") # Load once at startup
@app.route("/api/cities")
def cities():
bbox = request.args.get("bbox") # "minx,miny,maxx,maxy"
if bbox:
minx, miny, maxx, maxy = map(float, bbox.split(","))
result = gdf.cx[minx:maxx, miny:maxy]
else:
result = gdf
geojson_str = result.to_json()
return Response(geojson_str, mimetype="application/geo+json",
headers={"Access-Control-Allow-Origin": "*"})FastAPI
pythonfrom fastapi import FastAPI, Query
from fastapi.responses import Response
from typing import Optional
import geopandas as gpd
app = FastAPI()
gdf = gpd.read_file("cities.geojson")
@app.get("/api/cities")
async def get_cities(
min_population: Optional[int] = Query(None, description="Minimum population filter"),
bbox: Optional[str] = Query(None, description="minx,miny,maxx,maxy"),
):
result = gdf.copy()
if min_population:
result = result[result["population"] >= min_population]
if bbox:
minx, miny, maxx, maxy = map(float, bbox.split(","))
result = result.cx[minx:maxx, miny:maxy]
return Response(content=result.to_json(), media_type="application/geo+json")Type Hints and Validation with geojson-pydantic
geojson-pydantic provides Pydantic v2 models for all GeoJSON types defined in RFC 7946. It validates structure at parse time and integrates with FastAPI's automatic OpenAPI schema generation. Install with pip install geojson-pydantic.
Parsing and Validating Input
pythonfrom geojson_pydantic import Feature, FeatureCollection, Point, Polygon
from pydantic import ValidationError
raw = {
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [-73.9857, 40.7484]},
"properties": {"name": "Empire State Building"}
}
feature = Feature(**raw)
print(feature.geometry.coordinates) # (-73.9857, 40.7484)
print(type(feature.geometry)) # <class 'geojson_pydantic.geometries.Point'>
# Validation catches RFC 7946 violations at parse time
try:
bad = Point(type="Point", coordinates=[]) # empty coordinates
except ValidationError as e:
print(e) # List should have at least 2 itemsFastAPI with Typed GeoJSON Request Body
pythonfrom fastapi import FastAPI
from geojson_pydantic import FeatureCollection, Feature
from geojson_pydantic.geometries import Polygon as GeoPolygon
app = FastAPI()
@app.post("/api/features", response_model=FeatureCollection)
async def create_features(body: FeatureCollection) -> FeatureCollection:
# body is fully validated against RFC 7946 — wrong types raise 422 automatically
processed = []
for feature in body.features:
if isinstance(feature.geometry, GeoPolygon):
# type narrowing works — IDE knows feature.geometry has .coordinates
processed.append(feature)
return FeatureCollection(type="FeatureCollection", features=processed)Custom Properties with Generics
pythonfrom geojson_pydantic import Feature
from pydantic import BaseModel
from typing import Optional
class CityProperties(BaseModel):
name: str
population: int
country: str
elevation_m: Optional[float] = None
# Feature[Point, CityProperties] — geometry and properties are both typed
CityFeature = Feature[dict, CityProperties]
raw = {
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [-73.9857, 40.7484]},
"properties": {"name": "New York", "population": 8336817, "country": "US"}
}
city = CityFeature(**raw)
print(city.properties.population) # 8336817 — typed int, not string