BigQueryでST_INTERSECTSを使って苦労したことを綴ります。
失敗1:
逐次、SELECT文で結果を呼び出しているので、時間がかかる
SQL
with
/* -----------------------------------------------------------------------------
-- 入力テーブル指定
------------------------------------------------------------------------------*/
-- facilitys as (select distinct facility_id from ${dataset_base}.process_facility)
area_shp AS (SELECT * FROM `area_shp`),
centroid AS (SELECT * FROM `centroid`)
/* -----------------------------------------------------------------------------
-- citycodeを変数
------------------------------------------------------------------------------*/
/* -----------------------------------------------------------------------------
-- citycodeを付与
------------------------------------------------------------------------------*/
SELECT
dailyid, daily_group_id, in_time, out_time
, centroid_lat, centroid_lon,
CASE
-- 40131 東区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="東区"))
)
THEN 40131
-- 40132 博多区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="博多区"))
)
THEN 40132
-- 40133 中央区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="中央区"))
)
THEN 40133
-- 40134 南区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="南区"))
)
THEN 40134
-- 40135 西区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="西区"))
)
THEN 40135
-- 40136 城南区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="城南区"))
)
THEN 40136
-- 40137 早良区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="早良区"))
)
THEN 40137
-- その他
ELSE 99999
END AS city_code
FROM
centroid
ORDER BY dailyid, in_time
失敗1:
SETで変数化しようとしたが、メモリーオーバーした
SQL
/* -----------------------------------------------------------------------------
-- 地理情報を変数化
------------------------------------------------------------------------------*/
DECLARE wards ARRAY<GEOGRAPHY>;
-- 変数に値を設定
SET wards = (SELECT ARRAY_AGG(ST_GEOGFROMTEXT(geometry)) FROM `area_shp` WHERE ward_name!="福岡市");
with
/* -----------------------------------------------------------------------------
-- 入力テーブル指定
------------------------------------------------------------------------------*/
-- area_shp AS (SELECT * FROM `area_shp`),
centroid AS (SELECT * FROM `centroid`)
/* -----------------------------------------------------------------------------
-- citycodeを付与
------------------------------------------------------------------------------*/
SELECT
dailyid, daily_group_id, in_time, out_time
, centroid_lat, centroid_lon,
CASE
-- 40133 中央区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(0)])
THEN 40133
-- 40134 南区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(1)])
THEN 40134
-- 40132 博多区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(2)])
THEN 40132
-- 40136 城南区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(3)])
THEN 40136
-- 40137 早良区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(4)])
THEN 40137
-- 40131 東区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(5)])
THEN 40131
-- 40135 西区
WHEN
ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(6)])
THEN 40135
-- その他
ELSE 99999
END AS city_code
FROM
centroid
ORDER BY dailyid, in_time
成功
JOIN文で条件結合を実行する
SQL
with
/* -----------------------------------------------------------------------------
-- 入力テーブル指定
------------------------------------------------------------------------------*/
area_shp AS (SELECT * FROM `area_shp`),
centroid AS (SELECT * FROM `centroid`)
/* -----------------------------------------------------------------------------
-- citycodeを付与
------------------------------------------------------------------------------*/
SELECT
t1.dailyid, t1.daily_group_id, t1.in_time, t1.out_time
, t1.lat, t1.lon
, st_geogpoint(t1.lon, t1.lat) AS point
, t2.S_NAME AS s_name,
FROM
centroid AS t1
JOIN
area_shp AS t2
ON
ST_INTERSECTS(
st_geogpoint(t1.lon, t1.lat), ST_GEOGFROMTEXT(t2.geometry)
)
ORDER BY dailyid, in_time
まとめ
BigQueryでST_INTERSECTSを使って苦労したことを綴りました.
参考サイト
Efficient spatial matching in BigQuery
BigQuey 地理関数
BigQuery GISを用いた位置情報データ分析の入門
BigQuery 欠損値補完
BigQuery ハッシュ関数
BigQuery タイムスタンプ関数
BigQuery 文字列の結合
BigQuery 日時情報
BigQuery 配列関数
地理空間データを可視化
国土数値情報_行政区域データ
Pandas dataframe to Shapely LineString using GroupBy & SortBy
geopandas でshapefileをgeojsonに変換する
Geopandas: how to convert the column geometry to string?