Working with GeoJSON in Python

Use GeoJSON in Python with json Shapely GeoPandas and Fiona. Read write transform and serve spatial data.

Reading GeoJSON with stdlib json

For files under ~50 MB where you need no geometry operations — just reading properties or iterating features — the standard library is sufficient. No dependencies, no installation, ships with every Python 3.x interpreter.

pythonimport json

with open("features.geojson") as f:
    data = json.load(f)

for feature in data["features"]:
    props = feature["properties"]
    coords = feature["geometry"]["coordinates"]
    print(props.get("name"), coords)

This pattern works until you need to query by location, compute areas, or reproject. The moment you write if coords[0] > x and coords[1] > y to filter by bounding box, stop and use GeoPandas instead — you are reimplementing spatial indexing by hand.

Shapely for Geometry Operations

Shapely (2.0+, built on GEOS 3.11+) is the core geometry library for Python. It handles creation, manipulation, and measurement of geometries without any I/O or CRS awareness. Install with pip install shapely.

Creating Geometries

pythonfrom shapely.geometry import Point, LineString, Polygon, shape

# From explicit coordinates
point = Point(-73.9857, 40.7484)
line = LineString([(-73.9857, 40.7484), (-74.0060, 40.7128)])
poly = Polygon([(-74.01, 40.71), (-73.97, 40.71), (-73.97, 40.75), (-74.01, 40.75)])

# From a GeoJSON geometry dict (use shape() for interoperability)
geojson_geom = {"type": "Point", "coordinates": [-73.9857, 40.7484]}
point_from_geojson = shape(geojson_geom)

Buffer

python# Buffer in degrees (EPSG:4326) — not meaningful for metric measurements
buffered = point.buffer(0.01)  # ~1 km at mid-latitudes, imprecise

# For accurate metric buffers, project first (see pyproj section below)
# then buffer in meters, then reproject back

Union and Intersection

pythonfrom shapely.geometry import Polygon
from shapely.ops import unary_union

poly_a = Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])
poly_b = Polygon([(1, 1), (3, 1), (3, 3), (1, 3)])

intersection = poly_a.intersection(poly_b)  # Polygon covering (1,1)-(2,2)
union = poly_a.union(poly_b)                # L-shaped polygon
difference = poly_a.difference(poly_b)     # poly_a minus the overlap

# Merge a list of polygons into one
polygons = [poly_a, poly_b, Polygon([(4, 0), (6, 0), (6, 2), (4, 2)])]
merged = unary_union(polygons)

Area, Length, and Bounds

pythonprint(poly_a.area)        # 4.0 (in coordinate units — degrees if EPSG:4326)
print(line.length)        # Euclidean length in coordinate units
print(poly_a.bounds)      # (minx, miny, maxx, maxy) tuple
print(point.distance(Point(1, 1)))  # Euclidean distance

# Shapely 2.0 vectorized operations on arrays (much faster than looping)
import numpy as np
from shapely import distance, buffer, get_coordinates

points = [Point(i, i) for i in range(1000)]
areas = np.array([p.buffer(1).area for p in points])  # old style
# New style — operates on geometry arrays without Python loops:
from shapely import from_wkt, area as shp_area
geoms = [poly_a, poly_b]

GeoPandas for Tabular Spatial Data

GeoPandas wraps Shapely geometries in a Pandas DataFrame column, giving you SQL-like operations (filter, join, groupby) with spatial awareness. Install with pip install geopandas. GeoPandas 0.14+ uses Shapely 2.0 internally, which is 10–100x faster than the GEOS-via-ctypes path in older versions.

Reading and Writing GeoJSON

pythonimport geopandas as gpd

# Read GeoJSON (also reads Shapefile, GeoPackage, FlatGeobuf — same API)
gdf = gpd.read_file("cities.geojson")

print(gdf.crs)           # EPSG:4326 for RFC 7946 GeoJSON
print(gdf.shape)         # (n_features, n_columns)
print(gdf.dtypes)        # geometry column is 'geometry' dtype
print(gdf.head())

# Write back to GeoJSON
gdf.to_file("output.geojson", driver="GeoJSON")

# Write to GeoPackage (preferred for large datasets)
gdf.to_file("output.gpkg", driver="GPKG", layer="cities")

Filtering by Property

python# Standard Pandas boolean indexing works on all non-geometry columns
large_cities = gdf[gdf["population"] > 1_000_000]
us_cities = gdf[gdf["country"] == "US"]

# Chain conditions
major_us = gdf[(gdf["country"] == "US") & (gdf["population"] > 500_000)]

# Query syntax
west_coast = gdf.query("state in ['CA', 'OR', 'WA']")

# Reset index after filtering to avoid index gaps
major_us = major_us.reset_index(drop=True)

Spatial Joins

pythonimport geopandas as gpd

cities = gpd.read_file("cities.geojson")       # Points
regions = gpd.read_file("regions.geojson")     # Polygons

# Join cities to the region they fall within — adds region columns to cities
cities_with_region = cities.sjoin(regions, how="left", predicate="within")

# predicate options: "within", "intersects", "contains", "crosses", "touches"
# how options: "left", "right", "inner"

# Count cities per region
city_counts = cities_with_region.groupby("region_name").size().reset_index(name="city_count")

Dissolve

python# Merge all features sharing the same attribute value into one geometry
# Equivalent to SQL: SELECT country, ST_Union(geom), SUM(population) FROM cities GROUP BY country
dissolved = gdf.dissolve(by="country", aggfunc={"population": "sum", "name": "count"})

# Dissolve everything into a single feature (e.g., merge all districts into one boundary)
single = gdf.dissolve()  # no by= argument

Fiona for Low-Level I/O

Fiona is the I/O layer that GeoPandas uses internally, exposed directly for cases where you need streaming reads, schema inspection, or format control without loading everything into memory. Install with pip install fiona. Fiona 1.9+ uses GDAL 3.x and supports FlatGeobuf, GeoPackage, GeoJSON, Shapefile, and 80+ other drivers.

Schema Inspection

pythonimport fiona

with fiona.open("data.geojson") as src:
    print(src.driver)        # "GeoJSON"
    print(src.crs)           # {"init": "epsg:4326"} or CRS object
    print(src.schema)        # {"geometry": "Point", "properties": {"name": "str", "pop": "int"}}
    print(len(src))          # feature count (may require full scan for some formats)
    print(src.bounds)        # (minx, miny, maxx, maxy) of the entire layer

Streaming Read (Memory-Efficient)

pythonimport fiona

# Process features one at a time — constant memory regardless of file size
with fiona.open("large_dataset.fgb") as src:
    for feature in src:
        geom_type = feature["geometry"]["type"]
        props = feature["properties"]
        # Process without ever holding more than one feature in memory
        if props["population"] > 1_000_000:
            yield feature  # or write to output, etc.

Filtered Read by Bounding Box

pythonimport fiona

bbox = (-74.1, 40.6, -73.7, 40.9)  # (minx, miny, maxx, maxy) for NYC area

with fiona.open("countries.fgb") as src:
    # For FlatGeobuf and GeoPackage, this uses the spatial index — fast
    # For GeoJSON and Shapefile, it falls back to a full scan — slow
    for feature in src.filter(bbox=bbox):
        print(feature["properties"]["name"])

Writing with Format Control

pythonimport fiona
from fiona.crs import from_epsg

schema = {
    "geometry": "Point",
    "properties": {"name": "str", "elevation_m": "float"}
}

with fiona.open("output.gpkg", "w", driver="GPKG", schema=schema, crs=from_epsg(4326)) as dst:
    dst.write({
        "geometry": {"type": "Point", "coordinates": [-73.9857, 40.7484]},
        "properties": {"name": "Empire State Building", "elevation_m": 443.2}
    })

Coordinate Transformations with pyproj

pyproj wraps the PROJ library and handles all CRS transformations. Install with pip install pyproj. The Transformer class in pyproj 3.x is the correct API — the older Proj and transform() function API is deprecated.

EPSG:4326 to EPSG:3857 (Web Mercator)

pythonfrom pyproj import Transformer

# always_xy=True forces (longitude, latitude) input order regardless of CRS axis order
transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True)

# Single point: (longitude, latitude) -> (easting, northing) in meters
easting, northing = transformer.transform(-73.9857, 40.7484)
print(f"{easting:.2f}, {northing:.2f}")  # -8228052.29, 4975376.17

# Inverse: EPSG:3857 back to EPSG:4326
inv_transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326", always_xy=True)
lon, lat = inv_transformer.transform(easting, northing)
print(f"{lon:.6f}, {lat:.6f}")  # -73.985700, 40.748400

Reprojecting an Entire GeoDataFrame

pythonimport geopandas as gpd

gdf = gpd.read_file("cities.geojson")  # EPSG:4326
gdf_mercator = gdf.to_crs(epsg=3857)  # EPSG:3857

# Now .area and .length return values in square meters and meters
print(gdf_mercator.geometry.area)  # meaningful metric values

# For accurate area calculations, use an equal-area projection
gdf_equal_area = gdf.to_crs(epsg=6933)  # NSIDC EASE-Grid 2.0 equal-area
print(gdf_equal_area.geometry.area / 1e6)  # km²

Transforming Individual Shapely Geometries

pythonfrom pyproj import Transformer
from shapely.ops import transform as shapely_transform
from shapely.geometry import Polygon

transformer = Transformer.from_crs("EPSG:4326", "EPSG:32618", always_xy=True)

poly_wgs84 = Polygon([(-74.01, 40.71), (-73.97, 40.71), (-73.97, 40.75), (-74.01, 40.75)])
poly_utm = shapely_transform(transformer.transform, poly_wgs84)

print(f"Area in m²: {poly_utm.area:.0f}")  # accurate because UTM zone 18N is conformal

Performance Tips

orjson for Large File Parsing

orjson is a Rust-backed JSON library that is 3–10x faster than the stdlib json module for parsing. Parsing a 100 MB GeoJSON file takes ~1.8 s with stdlib json.load() and ~0.4 s with orjson on typical hardware. Install with pip install orjson.

pythonimport orjson

with open("large.geojson", "rb") as f:  # open in binary mode for orjson
    data = orjson.loads(f.read())

# orjson also serializes 3-5x faster than json.dumps
output = orjson.dumps(data, option=orjson.OPT_INDENT_2)

GeoPandas .cx[] for Spatial Indexing

Never iterate over a GeoDataFrame to filter by bounding box. The .cx[] accessor uses the built-in STRtree spatial index (Shapely 2.0) and is orders of magnitude faster.

pythonimport geopandas as gpd

gdf = gpd.read_file("world_cities.geojson")  # 100,000 features

# SLOW: Python loop, 100,000 bounding box checks
slow = [f for f in gdf.itertuples() if -10 < f.geometry.x < 40 and 35 < f.geometry.y < 72]

# FAST: STRtree index lookup, sub-millisecond for most queries
fast = gdf.cx[-10:40, 35:72]  # [minx:maxx, miny:maxy]

Streaming Large Files with Fiona

For files over 500 MB, loading into a GeoDataFrame may exhaust RAM. Use Fiona's streaming reader and write results feature-by-feature.

pythonimport fiona

with fiona.open("input_500mb.fgb") as src,      fiona.open("output.fgb", "w", driver="FlatGeobuf",
                schema=src.schema, crs=src.crs) as dst:
    for feature in src:
        if feature["properties"]["year"] >= 2020:
            dst.write(feature)
# Peak memory: one feature at a time, not 500 MB

GeoParquet for Analytical Workloads

GeoParquet stores geometries as WKB in a Parquet column, enabling columnar compression and predicate pushdown via DuckDB or pandas. A 1 GB GeoJSON file typically compresses to 80–150 MB as GeoParquet, and DuckDB can query it with spatial predicates without loading it fully into memory.

pythonimport geopandas as gpd

# Write GeoParquet (requires pyarrow: pip install pyarrow)
gdf = gpd.read_file("large.geojson")
gdf.to_parquet("output.parquet")  # GeoPandas 0.12+ native support

# Read back selectively with row group filtering
gdf_back = gpd.read_parquet("output.parquet", bbox=(-74.1, 40.6, -73.7, 40.9))

# Query with DuckDB — no Python GeoDataFrame needed
import duckdb
duckdb.execute("INSTALL spatial; LOAD spatial;")
result = duckdb.sql("""
    SELECT name, ST_Area(geom) as area_deg2
    FROM read_parquet('output.parquet')
    WHERE population > 1000000
""").df()

Serving GeoJSON from Flask and FastAPI

Both frameworks can serve GeoJSON directly. The key detail is setting the correct Content-Type header (application/geo+json, registered with IANA in 2018) and enabling CORS for browser-based map clients.

Flask

pythonfrom flask import Flask, Response, request
import geopandas as gpd
import json

app = Flask(__name__)
gdf = gpd.read_file("cities.geojson")  # Load once at startup

@app.route("/api/cities")
def cities():
    bbox = request.args.get("bbox")  # "minx,miny,maxx,maxy"
    if bbox:
        minx, miny, maxx, maxy = map(float, bbox.split(","))
        result = gdf.cx[minx:maxx, miny:maxy]
    else:
        result = gdf
    geojson_str = result.to_json()
    return Response(geojson_str, mimetype="application/geo+json",
                    headers={"Access-Control-Allow-Origin": "*"})

FastAPI

pythonfrom fastapi import FastAPI, Query
from fastapi.responses import Response
from typing import Optional
import geopandas as gpd

app = FastAPI()
gdf = gpd.read_file("cities.geojson")

@app.get("/api/cities")
async def get_cities(
    min_population: Optional[int] = Query(None, description="Minimum population filter"),
    bbox: Optional[str] = Query(None, description="minx,miny,maxx,maxy"),
):
    result = gdf.copy()
    if min_population:
        result = result[result["population"] >= min_population]
    if bbox:
        minx, miny, maxx, maxy = map(float, bbox.split(","))
        result = result.cx[minx:maxx, miny:maxy]
    return Response(content=result.to_json(), media_type="application/geo+json")

Type Hints and Validation with geojson-pydantic

geojson-pydantic provides Pydantic v2 models for all GeoJSON types defined in RFC 7946. It validates structure at parse time and integrates with FastAPI's automatic OpenAPI schema generation. Install with pip install geojson-pydantic.

Parsing and Validating Input

pythonfrom geojson_pydantic import Feature, FeatureCollection, Point, Polygon
from pydantic import ValidationError

raw = {
    "type": "Feature",
    "geometry": {"type": "Point", "coordinates": [-73.9857, 40.7484]},
    "properties": {"name": "Empire State Building"}
}

feature = Feature(**raw)
print(feature.geometry.coordinates)  # (-73.9857, 40.7484)
print(type(feature.geometry))        # <class 'geojson_pydantic.geometries.Point'>

# Validation catches RFC 7946 violations at parse time
try:
    bad = Point(type="Point", coordinates=[])  # empty coordinates
except ValidationError as e:
    print(e)  # List should have at least 2 items

FastAPI with Typed GeoJSON Request Body

pythonfrom fastapi import FastAPI
from geojson_pydantic import FeatureCollection, Feature
from geojson_pydantic.geometries import Polygon as GeoPolygon

app = FastAPI()

@app.post("/api/features", response_model=FeatureCollection)
async def create_features(body: FeatureCollection) -> FeatureCollection:
    # body is fully validated against RFC 7946 — wrong types raise 422 automatically
    processed = []
    for feature in body.features:
        if isinstance(feature.geometry, GeoPolygon):
            # type narrowing works — IDE knows feature.geometry has .coordinates
            processed.append(feature)
    return FeatureCollection(type="FeatureCollection", features=processed)

Custom Properties with Generics

pythonfrom geojson_pydantic import Feature
from pydantic import BaseModel
from typing import Optional

class CityProperties(BaseModel):
    name: str
    population: int
    country: str
    elevation_m: Optional[float] = None

# Feature[Point, CityProperties] — geometry and properties are both typed
CityFeature = Feature[dict, CityProperties]

raw = {
    "type": "Feature",
    "geometry": {"type": "Point", "coordinates": [-73.9857, 40.7484]},
    "properties": {"name": "New York", "population": 8336817, "country": "US"}
}

city = CityFeature(**raw)
print(city.properties.population)  # 8336817 — typed int, not string