Skip to content

BigQueryでST_INTERSECTSを使って苦労したことを綴ります。

失敗1:

逐次、SELECT文で結果を呼び出しているので、時間がかかる

SQL
with
/* -----------------------------------------------------------------------------
-- 入力テーブル指定
------------------------------------------------------------------------------*/
--  facilitys as (select distinct facility_id from ${dataset_base}.process_facility)
 area_shp AS (SELECT * FROM `area_shp`),
 centroid AS (SELECT * FROM `centroid`)

/* -----------------------------------------------------------------------------
-- citycodeを変数
------------------------------------------------------------------------------*/

/* -----------------------------------------------------------------------------
-- citycodeを付与
------------------------------------------------------------------------------*/
SELECT 
  dailyid, daily_group_id, in_time, out_time
  , centroid_lat, centroid_lon,
  CASE
    -- 40131	 	東区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
                    ,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="東区"))
                    )
      THEN 40131
    -- 40132	 	博多区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
                    ,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="博多区"))
                    )
      THEN 40132
    -- 40133	 	中央区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
                    ,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="中央区"))
                    )
      THEN 40133
    -- 40134	 	南区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
                    ,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="南区"))
                    )
      THEN 40134
    -- 40135	 	西区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
                    ,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="西区"))
                    )
      THEN 40135
    -- 40136	 	城南区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
                    ,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="城南区"))
                    )
      THEN 40136
    -- 40137	 	早良区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat)
                    ,ST_GEOGFROMTEXT((SELECT geometry FROM area_shp WHERE ward_name="早良区"))
                    )
      THEN 40137
    -- その他
    ELSE 99999
  END AS city_code
FROM
  centroid
ORDER BY dailyid, in_time

失敗1:

SETで変数化しようとしたが、メモリーオーバーした

SQL
/* -----------------------------------------------------------------------------
-- 地理情報を変数化
------------------------------------------------------------------------------*/
DECLARE wards ARRAY<GEOGRAPHY>;
-- 変数に値を設定
SET wards = (SELECT ARRAY_AGG(ST_GEOGFROMTEXT(geometry)) FROM `area_shp` WHERE ward_name!="福岡市");

with
/* -----------------------------------------------------------------------------
-- 入力テーブル指定
------------------------------------------------------------------------------*/
 --  area_shp AS (SELECT * FROM `area_shp`),
 centroid AS (SELECT * FROM `centroid`)

/* -----------------------------------------------------------------------------
-- citycodeを付与
------------------------------------------------------------------------------*/
SELECT 
  dailyid, daily_group_id, in_time, out_time
  , centroid_lat, centroid_lon,
  CASE
    -- 40133	 	中央区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(0)])
      THEN 40133
    -- 40134	 	南区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(1)])
      THEN 40134
    -- 40132	 	博多区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(2)])
      THEN 40132
    -- 40136	 	城南区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(3)])
      THEN 40136
    -- 40137	 	早良区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(4)])
      THEN 40137
    -- 40131	 	東区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(5)])
      THEN 40131
    -- 40135	 	西区
    WHEN 
      ST_Intersects(st_geogpoint(centroid_lon, centroid_lat),wards[ORDINAL(6)])
      THEN 40135
    -- その他
    ELSE 99999
  END AS city_code
FROM
  centroid
ORDER BY dailyid, in_time

成功

JOIN文で条件結合を実行する

SQL
with
/* -----------------------------------------------------------------------------
-- 入力テーブル指定
------------------------------------------------------------------------------*/
  area_shp AS (SELECT * FROM `area_shp`),
  centroid AS (SELECT * FROM `centroid`)

/* -----------------------------------------------------------------------------
-- citycodeを付与
------------------------------------------------------------------------------*/
SELECT 
  t1.dailyid, t1.daily_group_id, t1.in_time, t1.out_time
  , t1.lat, t1.lon
  , st_geogpoint(t1.lon, t1.lat) AS point
  , t2.S_NAME AS s_name,
FROM
  centroid AS t1
JOIN
  area_shp AS t2
ON
  ST_INTERSECTS(
    st_geogpoint(t1.lon, t1.lat), ST_GEOGFROMTEXT(t2.geometry)
  )
ORDER BY dailyid, in_time

まとめ

BigQueryでST_INTERSECTSを使って苦労したことを綴りました.

参考サイト

Efficient spatial matching in BigQuery
BigQuey 地理関数
BigQuery GISを用いた位置情報データ分析の入門
BigQuery 欠損値補完
BigQuery ハッシュ関数
BigQuery タイムスタンプ関数
BigQuery 文字列の結合
BigQuery 日時情報
BigQuery 配列関数
地理空間データを可視化
国土数値情報_行政区域データ
Pandas dataframe to Shapely LineString using GroupBy & SortBy
geopandas でshapefileをgeojsonに変換する
Geopandas: how to convert the column geometry to string?