고급 쿼리
컬렉션을 사용해 정리하기
내 환경설정을 기준으로 콘텐츠를 저장하고 분류하세요.
이 페이지의 고급 쿼리는
Google 애널리틱스. 유니버설 애널리틱스용 동일한 리소스를 찾고 있다면 유니버설 애널리틱스용 BigQuery 설명서를 참고하세요. 고급 쿼리를 사용하기 전에 먼저 기본 쿼리를 사용해 보세요.
특정 제품을 구매한 고객이 구매한 제품
다음 쿼리는 특정 제품을 구매한 고객이 구매한 다른 제품을 보여줍니다. 이 예에서는 제품을 동일한 순서로 구매했다고 가정하지 않습니다.
최적화된 예는 BigQuery 스크립팅 기능을 사용하여 필터링할 상품을 선언하는 변수를 정의합니다. 이렇게 하면 성능이 향상되지는 않지만 WITH
절을 사용하여 단일 값 표를 만드는 데 비해 더 읽기 쉬운 방법으로 변수를 정의할 수 있습니다. 단순화된 쿼리는 WITH
절을 사용하는 후자의 접근 방식을 사용합니다.
단순화된 쿼리는 별도의 '제품 A 구매자' 목록을 만들어 이 데이터와 결합합니다. 대신 최적화된 쿼리는 ARRAY_AGG
함수를 사용하여 사용자가 여러 주문을 통해 구매한 모든 상품의 목록을 만듭니다. 그런 다음 외부 WHERE
절을 사용하여 모든 사용자의 구매 목록이 target_item
에 대해 필터링되고 관련 상품만 표시됩니다.
단순화된 쿼리
-- Example: Products purchased by customers who purchased a specific product.
--
-- `Params` is used to hold the value of the selected product and is referenced
-- throughout the query.
WITH
Params AS (
-- Replace with selected item_name or item_id.
SELECT 'Google Navy Speckled Tee' AS selected_product
),
PurchaseEvents AS (
SELECT
user_pseudo_id,
items
FROM
-- Replace table name.
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE
-- Replace date range.
_TABLE_SUFFIX BETWEEN '20201101' AND '20210131'
AND event_name = 'purchase'
),
ProductABuyers AS (
SELECT DISTINCT
user_pseudo_id
FROM
Params,
PurchaseEvents,
UNNEST(items) AS items
WHERE
-- item.item_id can be used instead of items.item_name.
items.item_name = selected_product
)
SELECT
items.item_name AS item_name,
SUM(items.quantity) AS item_quantity
FROM
Params,
PurchaseEvents,
UNNEST(items) AS items
WHERE
user_pseudo_id IN (SELECT user_pseudo_id FROM ProductABuyers)
-- item.item_id can be used instead of items.item_name
AND items.item_name != selected_product
GROUP BY 1
ORDER BY item_quantity DESC;
최적화됨
-- Optimized Example: Products purchased by customers who purchased a specific product.
-- Replace item name
DECLARE target_item STRING DEFAULT 'Google Navy Speckled Tee';
SELECT
IL.item_name AS item_name,
SUM(IL.quantity) AS quantity
FROM
(
SELECT
user_pseudo_id,
ARRAY_AGG(STRUCT(item_name, quantity)) AS item_list
FROM
-- Replace table
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items)
WHERE
-- Replace date range
_TABLE_SUFFIX BETWEEN '20201201' AND '20201210'
AND event_name = 'purchase'
GROUP BY
1
),
UNNEST(item_list) AS IL
WHERE
target_item IN (SELECT item_name FROM UNNEST(item_list))
-- Remove the following line if you want the target_item to appear in the results
AND target_item != IL.item_name
GROUP BY
item_name
ORDER BY
quantity DESC;
사용자별 구매 세션당 평균 지출 금액
다음 쿼리는 사용자별 세션당 평균 지출 금액을 보여줍니다. 여기에는 사용자가 상품을 구매한 세션만 반영됩니다.
-- Example: Average amount of money spent per purchase session by user.
WITH
events AS (
SELECT
session.value.int_value AS session_id,
COALESCE(spend.value.int_value, spend.value.float_value, spend.value.double_value, 0.0)
AS spend_value,
event.*
-- Replace table name
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*` AS event
LEFT JOIN UNNEST(event.event_params) AS session
ON session.key = 'ga_session_id'
LEFT JOIN UNNEST(event.event_params) AS spend
ON spend.key = 'value'
-- Replace date range
WHERE _TABLE_SUFFIX BETWEEN '20201101' AND '20210131'
)
SELECT
user_pseudo_id,
COUNT(DISTINCT session_id) AS session_count,
SUM(spend_value) / COUNT(DISTINCT session_id) AS avg_spend_per_session_by_user
FROM events
WHERE event_name = 'purchase' and session_id IS NOT NULL
GROUP BY user_pseudo_id
사용자의 최신 세션 ID 및 세션 번호
다음 쿼리는 사용자 목록에서 최근 4일 동안의 최신 ga_session_id 및 ga_session_number 목록을 제공합니다. user_pseudo_id
목록 또는 user_id
목록을 제공할 수 있습니다.
user_pseudo_id
-- Get the latest ga_session_id and ga_session_number for specific users during last 4 days.
-- Replace timezone. List at https://en.wikipedia.org/wiki/List_of_tz_database_time_zones.
DECLARE REPORTING_TIMEZONE STRING DEFAULT 'America/Los_Angeles';
-- Replace list of user_pseudo_id's with ones you want to query.
DECLARE USER_PSEUDO_ID_LIST ARRAY<STRING> DEFAULT
[
'1005355938.1632145814', '979622592.1632496588', '1101478530.1632831095'];
CREATE TEMP FUNCTION GetParamValue(params ANY TYPE, target_key STRING)
AS (
(SELECT `value` FROM UNNEST(params) WHERE key = target_key LIMIT 1)
);
CREATE TEMP FUNCTION GetDateSuffix(date_shift INT64, timezone STRING)
AS (
(SELECT FORMAT_DATE('%Y%m%d', DATE_ADD(CURRENT_DATE(timezone), INTERVAL date_shift DAY)))
);
SELECT DISTINCT
user_pseudo_id,
FIRST_VALUE(GetParamValue(event_params, 'ga_session_id').int_value)
OVER (UserWindow) AS ga_session_id,
FIRST_VALUE(GetParamValue(event_params, 'ga_session_number').int_value)
OVER (UserWindow) AS ga_session_number
FROM
-- Replace table name.
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE
user_pseudo_id IN UNNEST(USER_PSEUDO_ID_LIST)
AND RIGHT(_TABLE_SUFFIX, 8)
BETWEEN GetDateSuffix(-3, REPORTING_TIMEZONE)
AND GetDateSuffix(0, REPORTING_TIMEZONE)
WINDOW UserWindow AS (PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC);
user_id
-- Get the latest ga_session_id and ga_session_number for specific users during last 4 days.
-- Replace timezone. List at https://en.wikipedia.org/wiki/List_of_tz_database_time_zones.
DECLARE REPORTING_TIMEZONE STRING DEFAULT 'America/Los_Angeles';
-- Replace list of user_id's with ones you want to query.
DECLARE USER_ID_LIST ARRAY<STRING> DEFAULT ['<user_id_1>', '<user_id_2>', '<user_id_n>'];
CREATE TEMP FUNCTION GetParamValue(params ANY TYPE, target_key STRING)
AS (
(SELECT `value` FROM UNNEST(params) WHERE key = target_key LIMIT 1)
);
CREATE TEMP FUNCTION GetDateSuffix(date_shift INT64, timezone STRING)
AS (
(SELECT FORMAT_DATE('%Y%m%d', DATE_ADD(CURRENT_DATE(timezone), INTERVAL date_shift DAY)))
);
SELECT DISTINCT
user_pseudo_id,
FIRST_VALUE(GetParamValue(event_params, 'ga_session_id').int_value)
OVER (UserWindow) AS ga_session_id,
FIRST_VALUE(GetParamValue(event_params, 'ga_session_number').int_value)
OVER (UserWindow) AS ga_session_number
FROM
-- Replace table name.
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE
user_id IN UNNEST(USER_ID_LIST)
AND RIGHT(_TABLE_SUFFIX, 8)
BETWEEN GetDateSuffix(-3, REPORTING_TIMEZONE)
AND GetDateSuffix(0, REPORTING_TIMEZONE)
WINDOW UserWindow AS (PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC);
달리 명시되지 않는 한 이 페이지의 콘텐츠에는 Creative Commons Attribution 4.0 라이선스에 따라 라이선스가 부여되며, 코드 샘플에는 Apache 2.0 라이선스에 따라 라이선스가 부여됩니다. 자세한 내용은 Google Developers 사이트 정책을 참조하세요. 자바는 Oracle 및/또는 Oracle 계열사의 등록 상표입니다.
최종 업데이트: 2024-09-12(UTC)
[null,null,["최종 업데이트: 2024-09-12(UTC)"],[[["\u003cp\u003eThis page provides advanced BigQuery queries for analyzing Google Analytics 4 event export data, going beyond basic queries.\u003c/p\u003e\n"],["\u003cp\u003eIt includes queries to identify products frequently purchased together, calculate average spending per purchase session, and retrieve the latest session information for specific users.\u003c/p\u003e\n"],["\u003cp\u003eThe queries are demonstrated with examples and explanations, including simplified and optimized versions where applicable.\u003c/p\u003e\n"],["\u003cp\u003eBefore using these advanced queries, it's recommended to familiarize yourself with the basic BigQuery queries for Google Analytics 4.\u003c/p\u003e\n"],["\u003cp\u003eUsers of Universal Analytics can find similar resources in the BigQuery cookbook for Universal Analytics linked on the page.\u003c/p\u003e\n"]]],["This document provides advanced BigQuery queries for Google Analytics event data. It details how to identify other products purchased by customers who bought a specific item, offering both simplified and optimized query examples that filter purchase lists. Another query calculates the average amount spent per purchase session per user. Lastly, it outlines how to retrieve the latest session ID and number for users, with examples for both `user_pseudo_id` and `user_id` lists.\n"],null,["# Advanced queries\n\nThe advanced queries in this page apply to the BigQuery event export data for\nGoogle Analytics. See [BigQuery cookbook for Universal Analytics](https://support.google.com/analytics/answer/4419694) if you are\nlooking for the same resource for Universal Analytics. Try the [basic queries](/analytics/bigquery/basic-queries)\nfirst before trying out the advanced ones.\n\n### Products purchased by customers who purchased a certain product\n\nThe following query shows what other products were purchased by customers who\npurchased a specific product. This example does not assume that the products\nwere purchased in the same order.\n\nThe optimized example relies on BigQuery scripting features to define a variable\nthat declares which items to filter on. While this does not improve performance,\nthis is a more readable approach for defining variables compared creating a\nsingle value table using a `WITH` clause. The simplified query uses the latter\napproach using the `WITH` clause.\n\nThe simplified query creats a separate list of \"Product A buyers\" and does a\njoin with that data. The optimized query, instead, creates a list of all items a\nuser has purchased across orders using the `ARRAY_AGG` function. Then using the\nouter `WHERE` clause, purchase lists across all users are filtered for the\n`target_item` and only relevant items are shown. \n\n### Simplified\n\n -- Example: Products purchased by customers who purchased a specific product.\n --\n -- `Params` is used to hold the value of the selected product and is referenced\n -- throughout the query.\n\n WITH\n Params AS (\n -- Replace with selected item_name or item_id.\n SELECT 'Google Navy Speckled Tee' AS selected_product\n ),\n PurchaseEvents AS (\n SELECT\n user_pseudo_id,\n items\n FROM\n -- Replace table name.\n `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`\n WHERE\n -- Replace date range.\n _TABLE_SUFFIX BETWEEN '20201101' AND '20210131'\n AND event_name = 'purchase'\n ),\n ProductABuyers AS (\n SELECT DISTINCT\n user_pseudo_id\n FROM\n Params,\n PurchaseEvents,\n UNNEST(items) AS items\n WHERE\n -- item.item_id can be used instead of items.item_name.\n items.item_name = selected_product\n )\n SELECT\n items.item_name AS item_name,\n SUM(items.quantity) AS item_quantity\n FROM\n Params,\n PurchaseEvents,\n UNNEST(items) AS items\n WHERE\n user_pseudo_id IN (SELECT user_pseudo_id FROM ProductABuyers)\n -- item.item_id can be used instead of items.item_name\n AND items.item_name != selected_product\n GROUP BY 1\n ORDER BY item_quantity DESC;\n\n### Optimized\n\n -- Optimized Example: Products purchased by customers who purchased a specific product.\n\n -- Replace item name\n DECLARE target_item STRING DEFAULT 'Google Navy Speckled Tee';\n\n SELECT\n IL.item_name AS item_name,\n SUM(IL.quantity) AS quantity\n FROM\n (\n SELECT\n user_pseudo_id,\n ARRAY_AGG(STRUCT(item_name, quantity)) AS item_list\n FROM\n -- Replace table\n `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items)\n WHERE\n -- Replace date range\n _TABLE_SUFFIX BETWEEN '20201201' AND '20201210'\n AND event_name = 'purchase'\n GROUP BY\n 1\n ),\n UNNEST(item_list) AS IL\n WHERE\n target_item IN (SELECT item_name FROM UNNEST(item_list))\n -- Remove the following line if you want the target_item to appear in the results\n AND target_item != IL.item_name\n GROUP BY\n item_name\n ORDER BY\n quantity DESC;\n\n### Average amount of money spent per purchase session by user\n\nThe following query shows the average amount of money spent per session by each\nuser. This takes into account only the sessions where the user made a purchase. \n\n -- Example: Average amount of money spent per purchase session by user.\n\n WITH\n events AS (\n SELECT\n session.value.int_value AS session_id,\n COALESCE(spend.value.int_value, spend.value.float_value, spend.value.double_value, 0.0)\n AS spend_value,\n event.*\n\n -- Replace table name\n FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*` AS event\n LEFT JOIN UNNEST(event.event_params) AS session\n ON session.key = 'ga_session_id'\n LEFT JOIN UNNEST(event.event_params) AS spend\n ON spend.key = 'value'\n\n -- Replace date range\n WHERE _TABLE_SUFFIX BETWEEN '20201101' AND '20210131'\n )\n SELECT\n user_pseudo_id,\n COUNT(DISTINCT session_id) AS session_count,\n SUM(spend_value) / COUNT(DISTINCT session_id) AS avg_spend_per_session_by_user\n FROM events\n WHERE event_name = 'purchase' and session_id IS NOT NULL\n GROUP BY user_pseudo_id\n\n### Latest Session Id and Session Number for users\n\nThe following query provides the list of the latest ga_session_id and\nga_session_number from last 4 days for a list of users. You can provide either a\n`user_pseudo_id` list or a `user_id` list. \n\n### user_pseudo_id\n\n -- Get the latest ga_session_id and ga_session_number for specific users during last 4 days.\n\n -- Replace timezone. List at https://en.wikipedia.org/wiki/List_of_tz_database_time_zones.\n DECLARE REPORTING_TIMEZONE STRING DEFAULT 'America/Los_Angeles';\n\n -- Replace list of user_pseudo_id's with ones you want to query.\n DECLARE USER_PSEUDO_ID_LIST ARRAY\u003cSTRING\u003e DEFAULT\n [\n '1005355938.1632145814', '979622592.1632496588', '1101478530.1632831095'];\n\n CREATE TEMP FUNCTION GetParamValue(params ANY TYPE, target_key STRING)\n AS (\n (SELECT `value` FROM UNNEST(params) WHERE key = target_key LIMIT 1)\n );\n\n CREATE TEMP FUNCTION GetDateSuffix(date_shift INT64, timezone STRING)\n AS (\n (SELECT FORMAT_DATE('%Y%m%d', DATE_ADD(CURRENT_DATE(timezone), INTERVAL date_shift DAY)))\n );\n\n SELECT DISTINCT\n user_pseudo_id,\n FIRST_VALUE(GetParamValue(event_params, 'ga_session_id').int_value)\n OVER (UserWindow) AS ga_session_id,\n FIRST_VALUE(GetParamValue(event_params, 'ga_session_number').int_value)\n OVER (UserWindow) AS ga_session_number\n FROM\n -- Replace table name.\n `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`\n WHERE\n user_pseudo_id IN UNNEST(USER_PSEUDO_ID_LIST)\n AND RIGHT(_TABLE_SUFFIX, 8)\n BETWEEN GetDateSuffix(-3, REPORTING_TIMEZONE)\n AND GetDateSuffix(0, REPORTING_TIMEZONE)\n WINDOW UserWindow AS (PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC);\n\n### user_id\n\n -- Get the latest ga_session_id and ga_session_number for specific users during last 4 days.\n\n -- Replace timezone. List at https://en.wikipedia.org/wiki/List_of_tz_database_time_zones.\n DECLARE REPORTING_TIMEZONE STRING DEFAULT 'America/Los_Angeles';\n\n -- Replace list of user_id's with ones you want to query.\n DECLARE USER_ID_LIST ARRAY\u003cSTRING\u003e DEFAULT ['\u003cuser_id_1\u003e', '\u003cuser_id_2\u003e', '\u003cuser_id_n\u003e'];\n\n CREATE TEMP FUNCTION GetParamValue(params ANY TYPE, target_key STRING)\n AS (\n (SELECT `value` FROM UNNEST(params) WHERE key = target_key LIMIT 1)\n );\n\n CREATE TEMP FUNCTION GetDateSuffix(date_shift INT64, timezone STRING)\n AS (\n (SELECT FORMAT_DATE('%Y%m%d', DATE_ADD(CURRENT_DATE(timezone), INTERVAL date_shift DAY)))\n );\n\n SELECT DISTINCT\n user_pseudo_id,\n FIRST_VALUE(GetParamValue(event_params, 'ga_session_id').int_value)\n OVER (UserWindow) AS ga_session_id,\n FIRST_VALUE(GetParamValue(event_params, 'ga_session_number').int_value)\n OVER (UserWindow) AS ga_session_number\n FROM\n -- Replace table name.\n `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`\n WHERE\n user_id IN UNNEST(USER_ID_LIST)\n AND RIGHT(_TABLE_SUFFIX, 8)\n BETWEEN GetDateSuffix(-3, REPORTING_TIMEZONE)\n AND GetDateSuffix(0, REPORTING_TIMEZONE)\n WINDOW UserWindow AS (PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC);"]]