diff --git a/lib/db/queries.go b/lib/db/queries.go index 1c81781d2f22b86d14f8086758611ba7f8072646..c0b715c322b9925bcea9084806740a76c3cfb4d4 100644 --- a/lib/db/queries.go +++ b/lib/db/queries.go @@ -42,10 +42,8 @@ func InsertShortURL(bucket, key, destURL string, associatedUser *string) error { key = "/" + key } result, err := DB.Exec(insertShortURL, - bucket+key, bucket, key, - key[1:], destURL, associatedUser) if err != nil { @@ -67,10 +65,8 @@ func InsertFile(bucket, key, ext, contentType string, contentLength int64, md5Ha key = "/" + key } result, err := DB.Exec(insertFile, - bucket+key+ext, bucket, key+ext, - key[1:], contentType, contentLength, md5Hash, @@ -93,7 +89,7 @@ func InsertFile(bucket, key, ext, contentType string, contentLength int64, md5Ha // exists with the given username or email address. func CheckUserExistsByUsernameOrEmail(username, email string) (bool, error) { var id string - err := DB.QueryRow(selectUserByUsernameOrEmail, strings.ToLower(username), email).Scan(&id) + err := DB.QueryRow(selectUserByUsernameOrEmail, username, email).Scan(&id) if err == sql.ErrNoRows { return false, nil } @@ -105,7 +101,7 @@ func CheckUserExistsByUsernameOrEmail(username, email string) (bool, error) { // InsertUser inserts a user into the database. func InsertUser(id, username, email string) error { - result, err := DB.Exec(insertUser, id, username, strings.ToLower(username), email) + result, err := DB.Exec(insertUser, id, username, email) if err != nil { return err } @@ -264,7 +260,7 @@ func CountObjectsByAssociatedUser(userID string, typ int) (int, error) { // GetObject returns an object. func GetObject(bucket, key string) (Object, error) { - row := DB.QueryRow(getObjectByBucketKey, fmt.Sprintf("%s/%s", bucket, key)) + row := DB.QueryRow(getObjectByBucketKey, bucket, key) return scanObject(row) } @@ -287,7 +283,7 @@ func UpdateObjectToTombstoneByBucketKey(bucket, key string, reason *string, reta sql = updateObjectToTombstoneKeepHashesAndAssociatedUserByBucketKey } } - result, err := DB.Exec(sql, reason, fmt.Sprintf("%s/%s", bucket, key)) + result, err := DB.Exec(sql, reason, bucket, key) if err != nil { return err } diff --git a/lib/db/sql.go b/lib/db/sql.go index 2ae87afe0ee7b987556cbbc76ad64640a781d4fe..b25970405ef9dcc2061f66c5353ff7afc47eccb1 100644 --- a/lib/db/sql.go +++ b/lib/db/sql.go @@ -9,10 +9,10 @@ SELECT is_blocked, bucket_capacity FROM - users u, - tokens t + user_ u, + legacy_token t WHERE - u.id = t.user_id AND + u.id = t.owned_by AND t.token = $1 LIMIT 1 ` @@ -21,46 +21,47 @@ var countOfObjectByBucketAndRandomKey = ` SELECT COUNT(*) as count FROM - objects + object WHERE bucket = $1 AND - random_key = $2 + enforce_unique_name AND + path_full_stem(path) = path_normalize('/' || $2) ` var insertShortURL = ` INSERT INTO - objects (bucket_key, bucket, key, random_key, dir, type, dest_url, content_type, associated_user) + object (bucket, path, location, owned_by, type, content_length, enforce_unique_name) VALUES - ($1, $2, $3, $4, '/', 1, $5, NULL, $6) + ($1, $2, $3, $4, 'redirect', -1, true) ` var insertFile = ` INSERT INTO - objects (bucket_key, bucket, key, random_key, dir, content_type, content_length, md5_hash, sha256_hash, associated_user) + object (bucket, path, content_type, content_length, md5_digest, sha256_digest, owned_by, type, enforce_unique_name) VALUES - ($1, $2, $3, $4, '/', $5, $6, $7, $8, $9) + ($1, $2, $3, $4, $5, $6, $7, 'file', true) ` var selectUserByUsernameOrEmail = ` SELECT id FROM - users + user_ WHERE - username_lower = $1 OR + username = $1 OR email = $2 ` var insertUser = ` INSERT INTO - users (id, username, username_lower, email) + user_ (id, username, email) VALUES - ($1, $2, $3, $4) + ($1, $2, $3) ` var insertToken = ` INSERT INTO - tokens (user_id, token) + legacy_token (owned_by, token) VALUES ($1, $2) ` @@ -69,57 +70,62 @@ var countObjectsByAssociatedUser = ` SELECT COUNT(*) as count FROM - objects + object WHERE - associated_user = $1 AND - "type" %v + owned_by = $1 AND + type %v::object_type ` var listObjectsByAssociatedUser = ` SELECT - bucket, - key, - dir, - "type", - dest_url, - content_type, - content_length, - created_at, - deleted_at, - delete_reason, - md5_hash, - sha256_hash, - associated_user + b.name, + o.path, + path_directory(o.path), + o.type::integer, + o.location, + o.content_type, + v1.content_length(o.content_length), + o.created_at, + o.tombstoned_at, + o.tombstone_reason, + o.md5_digest, + o.sha256_digest, + o.owned_by FROM - objects + object o +INNER JOIN + bucket b ON o.bucket = b.name WHERE - associated_user = $1 AND - "type" %v + o.owned_by = $1 AND + o.type %v::object_type ORDER BY - created_at %s + o.created_at %s LIMIT $2 OFFSET $3 ` var getObjectByBucketKey = ` SELECT - bucket, - key, - dir, - "type", - dest_url, - content_type, - content_length, - created_at, - deleted_at, - delete_reason, - md5_hash, - sha256_hash, - associated_user + b.name, + o.path, + path_directory(o.path), + o.type::integer, + o.location, + o.content_type, + v1.content_length(o.content_length), + o.created_at, + o.tombstoned_at, + o.tombstone_reason, + o.md5_digest, + o.sha256_digest, + o.owned_by FROM - objects + object o +INNER JOIN + bucket b ON o.bucket = b.name WHERE - bucket_key = $1 + o.bucket = $1 AND + o.path = path_normalize('/' || $2) LIMIT 1 ` @@ -127,150 +133,154 @@ var countOfObjectsBySHA256 = ` SELECT COUNT(*) FROM - objects + object WHERE - "type" = 0 AND - sha256_hash = $1 + type = 'file' AND + sha256_digest = $1 ` var updateObjectToTombstoneByBucketKey = ` UPDATE - objects + object SET - type = 2, - deleted_at = CURRENT_TIMESTAMP, - dest_url = NULL, + type = 'tombstone', + tombstoned_at = now(), + location = NULL, content_type = NULL, - content_length = NULL, - delete_reason = $1, - md5_hash = NULL, - sha256_hash = NULL, - associated_user = NULL + content_length = -1, + tombstone_reason = $1, + md5_digest = NULL, + sha256_digest = NULL, + owned_by = NULL WHERE - bucket_key = $2 + bucket = $2 AND + path = path_normalize('/' || $3) ` var updateObjectToTombstoneKeepAssociatedUserByBucketKey = ` UPDATE - objects + object SET - type = 2, - deleted_at = CURRENT_TIMESTAMP, - dest_url = NULL, + type = 'tombstone', + tombstoned_at = now(), + location = NULL, content_type = NULL, - content_length = NULL, - delete_reason = $1, - md5_hash = NULL, - sha256_hash = NULL + content_length = -1, + tombstone_reason = $1, + md5_digest = NULL, + sha256_digest = NULL WHERE - bucket_key = $2 + bucket = $2 AND + path = path_normalize('/' || $3) ` var updateObjectToTombstoneKeepHashesAndAssociatedUserByBucketKey = ` UPDATE - objects + object SET - type = 2, - deleted_at = CURRENT_TIMESTAMP, - dest_url = NULL, + type = 'tombstone', + tombstoned_at = now(), + location = NULL, content_type = NULL, - content_length = NULL, - delete_reason = $1 + content_length = -1, + tombstone_reason = $1 WHERE - bucket_key = $2 + bucket = $2 AND + path = path_normalize('/' || $3) ` var updateObjectToTombstoneBySHA256Hash = ` UPDATE - objects + object SET - type = 2, - deleted_at = CURRENT_TIMESTAMP, - dest_url = NULL, + type = 'tombstone', + tombstoned_at = now(), + location = NULL, content_type = NULL, - content_length = NULL, - delete_reason = $1, - md5_hash = NULL, - sha256_hash = NULL, - associated_user = NULL + content_length = -1, + tombstone_reason = $1, + md5_digest = NULL, + sha256_digest = NULL, + owned_by = NULL WHERE - sha256_hash = $2 + sha256_digest = $2 ` var updateObjectToTombstoneKeepAssociatedUserBySHA256Hash = ` UPDATE - objects + object SET - type = 2, - deleted_at = CURRENT_TIMESTAMP, - dest_url = NULL, + type = 'tombstone', + tombstoned_at = now(), + location = NULL, content_type = NULL, - content_length = NULL, - delete_reason = $1, - md5_hash = NULL, - sha256_hash = NULL + content_length = -1, + tombstone_reason = $1, + md5_digest = NULL, + sha256_digest = NULL WHERE - sha256_hash = $2 + sha256_digest = $2 ` var updateObjectToTombstoneKeepHashesAndAssociatedUserBySHA256Hash = ` UPDATE - objects + object SET - type = 2, - deleted_at = CURRENT_TIMESTAMP, - dest_url = NULL, + type = 'tombstone', + tombstoned_at = now(), + location = NULL, content_type = NULL, - content_length = NULL, - delete_reason = $1 + content_length = -1, + tombstone_reason = $1 WHERE - sha256_hash = $2 + sha256_digest = $2 ` var countOfFileBanBySHA256 = ` SELECT COUNT(*) as count FROM - file_bans + file_ban WHERE - sha256_hash = $1 + sha256_digest = $1 ` var insertFileBan = ` INSERT INTO - file_bans (sha256_hash, did_quarantine, reason, description, malware_name) + file_ban (sha256_digest, did_quarantine, reason, description, malware_name) VALUES - ($1, $2, $3, $4, $5) + -- FIXME(auri): why is the cast to integer necessary??? + ($1, $2, $3::integer, $4, $5) ` var listBannedFiles = ` SELECT - sha256_hash, + sha256_digest, did_quarantine, - reason, + reason::integer, description, malware_name FROM - file_bans + file_ban ` var getBannedFileBySHA256Hash = ` SELECT - sha256_hash, + sha256_digest, did_quarantine, - reason, + reason::integer, description, malware_name FROM - file_bans + file_ban WHERE - sha256_hash = $1 + sha256_digest = $1 LIMIT 1 ` var deleteFileBan = ` DELETE FROM - file_bans + file_ban WHERE - sha256_hash = $1 + sha256_digest = $1 ` diff --git a/sql/README b/sql/README index 82d2fdfb244f9f0f400e0a911bc229bc5b60f139..35c9a7ee2781f501ddd0dfada18c108f89816b76 100644 --- a/sql/README +++ b/sql/README @@ -4,3 +4,5 @@ for use with the API server and cdn-origin. Reference: - v1.sql: the raw, unprocessed output of pg_dump --no-owner --no-privileges --schema-only over the main owo database, version 1. +- v1tov2.sql: a migration script that converts a version 1 database to a + version 2 database. diff --git a/sql/v1tov2.sql b/sql/v1tov2.sql new file mode 100644 index 0000000000000000000000000000000000000000..e4701e6d68cdd2ce272e05c53f3ae5e076f615dc --- /dev/null +++ b/sql/v1tov2.sql @@ -0,0 +1,276 @@ +SET LOCAL search_path = public; + +-- API v1 compatibility helpers. To be dropped when API server v1 is retired. +CREATE SCHEMA v1; + +CREATE TYPE object_type AS ENUM ('file', 'redirect', 'tombstone'); + +CREATE TYPE file_ban_reason AS ENUM ('other', 'malware', 'takedown'); + +-- RFC 3696 Section 3 Errata 1690. +CREATE DOMAIN emailaddr AS character varying(254); + +-- RFC 6838 recommends 64 characters as a sane limit for each of the two +-- restricted-names (type-name "/" subtype-name). An extra character is +-- required for the "/" delimiter, bringing the total up to 129: 127 characters +-- are left to be used by longer media types, parameters and their values and +-- similar oddities. +CREATE DOMAIN mediatype AS character varying(256); + +CREATE DOMAIN sha256 AS bytea CHECK (octet_length(VALUE) = 32); + +CREATE DOMAIN md5 AS bytea CHECK (octet_length(VALUE) = 16); + +-- ...a reasonable limit? +CREATE DOMAIN url AS character varying(4096); + +CREATE DOMAIN object_path AS character varying(4096); + +-- Given '//foo/bar//baz', return '/foo/bar/baz'. +CREATE FUNCTION path_normalize(object_path) RETURNS object_path + IMMUTABLE STRICT + LANGUAGE SQL + AS $$SELECT regexp_replace($1, '/+', '/', 'g');$$; + +-- Given '/foo/bar.baz.qux', return '/foo/bar'. +-- See bucket.enforce_unique_names and object.enforce_unique_name. +CREATE FUNCTION path_full_stem(object_path) RETURNS object_path + IMMUTABLE STRICT + LANGUAGE SQL + AS $$SELECT split_part(path_normalize($1), '.', 1);$$; + +-- Given '/foo/bar/baz', return '/foo/bar'. +-- Given '/', return '/'. +-- Given 'foo', return ''. +CREATE FUNCTION path_directory(object_path) RETURNS object_path + IMMUTABLE STRICT + LANGUAGE plpgsql + AS $$ + DECLARE + l integer = length($1); + i integer; + head object_path = ''; + BEGIN + FOR i IN REVERSE l..1 LOOP + IF starts_with(substr($1, i), '/') THEN + head := substr($1, 0, i + 1); + EXIT; + END IF; + END LOOP; + + IF head != '' AND head != repeat('/', length(head)) THEN + head := rtrim(head, '/'); + END IF; + + RETURN head; + END + $$; + +CREATE FUNCTION v1.content_length(bigint) RETURNS bigint + IMMUTABLE STRICT + LANGUAGE SQL + AS $$SELECT (CASE WHEN $1 < 0 THEN NULL ELSE $1 END);$$; + +CREATE FUNCTION v1.object_type(integer) RETURNS object_type + IMMUTABLE STRICT + LANGUAGE SQL + AS $$SELECT (CASE $1 + WHEN 0 THEN 'file' + WHEN 1 THEN 'redirect' + WHEN 2 THEN 'tombstone' + END)::object_type;$$; + +CREATE FUNCTION v1.as_api_integer(object_type) RETURNS integer + IMMUTABLE STRICT + LANGUAGE SQL + AS $$SELECT (CASE $1 + WHEN 'file' THEN 0 + WHEN 'redirect' THEN 1 + WHEN 'tombstone' THEN 2 + END);$$; + +CREATE FUNCTION v1.file_ban_reason(integer) RETURNS file_ban_reason + IMMUTABLE STRICT + LANGUAGE SQL + AS $$SELECT (CASE $1 + WHEN 0 THEN 'other' + WHEN 1 THEN 'malware' + WHEN 2 THEN 'takedown' + WHEN 3 THEN 'other' -- previously tos_violation + END)::file_ban_reason;$$; + +CREATE FUNCTION v1.as_api_integer(file_ban_reason) RETURNS integer + IMMUTABLE STRICT + LANGUAGE SQL + AS $$SELECT (CASE $1 + WHEN 'other' THEN 0 + WHEN 'malware' THEN 1 + WHEN 'takedown' THEN 2 + END);$$; + +CREATE CAST (integer AS object_type) + WITH FUNCTION v1.object_type + AS ASSIGNMENT; + +CREATE CAST (object_type AS integer) + WITH FUNCTION v1.as_api_integer(object_type); + +CREATE CAST (integer AS file_ban_reason) + WITH FUNCTION v1.file_ban_reason + AS ASSIGNMENT; + +CREATE CAST (file_ban_reason AS integer) + WITH FUNCTION v1.as_api_integer(file_ban_reason); + +CREATE COLLATION "und-u-ks-level2-x-icu" ( + locale = 'und@colStrength=secondary', + provider = 'icu', + deterministic = false +); + +ALTER TABLE users RENAME TO user_; +ALTER TABLE user_ RENAME CONSTRAINT users_pk TO user__pk; +ALTER TABLE user_ RENAME CONSTRAINT users_email_key TO user__email_key; + +ALTER TABLE user_ + -- Replaced by the ICU collation on username and user__username_key. + DROP COLUMN username_lower, + -- Ditto. + DROP CONSTRAINT users_username_key, + ALTER COLUMN id SET DATA TYPE uuid USING id::uuid, + ALTER COLUMN id SET DEFAULT gen_random_uuid(), + ALTER COLUMN username SET DATA TYPE character varying(256) + COLLATE "und-u-ks-level2-x-icu", + ALTER COLUMN email SET DATA TYPE emailaddr, + ADD COLUMN created_on timestamp with time zone NOT NULL DEFAULT now(), + ADD UNIQUE (username); + +INSERT INTO user_ (id, username, email, is_admin, is_blocked, created_on) +VALUES ('d4d422b7-61b6-4e25-b1fd-e4197dd0dd1b', + 'owo', + 'system@whats-th.is', + true, + false, + '2016-12-10T04:00:34Z'); + +ALTER TABLE tokens RENAME TO legacy_token; +ALTER TABLE legacy_token RENAME COLUMN user_id TO owned_by; + +ALTER TABLE legacy_token + DROP COLUMN id, + DROP CONSTRAINT tokens_token_key, + ALTER COLUMN owned_by SET DATA TYPE uuid USING owned_by::uuid, + ADD PRIMARY KEY (token), + ADD FOREIGN KEY (owned_by) REFERENCES user_ (id); + +CREATE TABLE bucket ( + name character varying(32) PRIMARY KEY COLLATE "und-u-ks-level2-x-icu", + owned_by uuid NOT NULL REFERENCES user_ (id), + created_at timestamp with time zone NOT NULL DEFAULT now(), + enforce_unique_names boolean NOT NULL, + -- Needed to make the FK constraint on object work. + UNIQUE (name, enforce_unique_names) +); + +INSERT INTO bucket (name, owned_by, created_at, enforce_unique_names) +VALUES ('public', + 'd4d422b7-61b6-4e25-b1fd-e4197dd0dd1b', -- owo + '2016-12-10T04:00:34Z', + true); + +ALTER TABLE objects RENAME TO object; +ALTER TABLE object RENAME COLUMN dest_url TO location; +ALTER TABLE object RENAME COLUMN sha256_hash TO sha256_digest; +ALTER TABLE object RENAME COLUMN md5_hash TO md5_digest; +ALTER TABLE object RENAME COLUMN associated_user TO owned_by; +ALTER TABLE object RENAME COLUMN deleted_at TO tombstoned_at; +ALTER TABLE object RENAME COLUMN delete_reason TO tombstone_reason; +ALTER INDEX objects_associated_user RENAME TO object_owned_by_idx; +ALTER INDEX objects_sha256_hash_idx RENAME TO object_sha256_digest_idx; + +-- Prepare for ALTER COLUMN content_length SET NOT NULL. +UPDATE object + SET content_length = -1 + WHERE content_length IS NULL; + +ALTER TABLE object + -- Unused. + DROP COLUMN backend_file_id, + ALTER COLUMN type DROP DEFAULT, + ALTER COLUMN type SET DATA TYPE object_type, + ALTER COLUMN content_type DROP DEFAULT, + ALTER COLUMN content_type SET DATA TYPE mediatype, + -- Previously objects could be up to 2GB (and change) in size. bigint + -- should afford us about 9EB :) + ALTER COLUMN content_length SET DATA TYPE bigint, + ALTER COLUMN content_length SET NOT NULL, + ALTER COLUMN sha256_digest SET DATA TYPE sha256, + ALTER COLUMN md5_digest SET DATA TYPE md5, + ALTER COLUMN location DROP DEFAULT, + ALTER COLUMN location SET DATA TYPE url, + ALTER COLUMN owned_by DROP DEFAULT, + ALTER COLUMN owned_by SET DATA TYPE uuid USING owned_by::uuid, + -- Casting timestamp to timestamp with time zone is equivalent to: + -- USING (created_at as time zone current_setting('timezone')) + ALTER COLUMN created_at SET DATA TYPE timestamp with time zone, + ALTER COLUMN tombstoned_at SET DATA TYPE timestamp with time zone, + -- DEFAULT NULL is implied on nullable columns. + ALTER COLUMN tombstone_reason DROP DEFAULT, + ADD COLUMN id uuid DEFAULT gen_random_uuid() PRIMARY KEY, + ADD FOREIGN KEY (owned_by) REFERENCES user_ (id), + ADD CHECK (content_length >= -1), + ADD CONSTRAINT object_is_valid_file_check + CHECK (type != 'file' OR content_type IS NOT NULL + AND content_length >= 0 + AND sha256_digest IS NOT NULL + AND md5_digest IS NOT NULL), + ADD CONSTRAINT object_is_valid_redirect_check + CHECK (type != 'redirect' OR location IS NOT NULL), + ADD CONSTRAINT object_is_valid_tombstone_check + CHECK (type != 'tombstone' OR tombstoned_at IS NOT NULL); + +-- Unify bucket_key, bucket, key, dir and random_key. +ALTER TABLE object + ALTER COLUMN bucket SET DATA TYPE character varying(32), + ADD COLUMN path object_path NOT NULL + GENERATED ALWAYS AS (path_normalize(dir || '/' || key)) STORED, + ADD COLUMN enforce_unique_name boolean, + ADD FOREIGN KEY (bucket, enforce_unique_name) + REFERENCES bucket (name, enforce_unique_names); + +-- Finalize the migration to bucket/path: this has to be done in two steps due +-- to how GENERATED ... AS expressions work. +ALTER TABLE object + ALTER COLUMN path DROP EXPRESSION, + DROP COLUMN bucket_key, + DROP COLUMN key, + DROP COLUMN dir, + DROP COLUMN random_key; + +CREATE UNIQUE INDEX object_bucket_path_key + ON object (bucket, path); +CREATE UNIQUE INDEX object_path_full_stem_path_key + ON object (path_full_stem(path)) + WHERE enforce_unique_name; + +CREATE FUNCTION _object_before_insert_normalize_path() RETURNS trigger + LANGUAGE plpgsql + AS $$BEGIN NEW.path = path_normalize(NEW.path); RETURN NEW; END$$; +CREATE TRIGGER object_before_insert_normalize_path_tg + BEFORE INSERT ON object + FOR EACH ROW EXECUTE FUNCTION _object_before_insert_normalize_path(); + +ALTER TABLE file_bans RENAME TO file_ban; +ALTER TABLE file_ban RENAME COLUMN sha256_hash TO sha256_digest; +ALTER TABLE file_ban RENAME CONSTRAINT file_bans_sha256_hash_key TO file_ban_sha256_digest_key; + +ALTER TABLE file_ban + ALTER COLUMN did_quarantine DROP DEFAULT, + ALTER COLUMN reason DROP DEFAULT, + ALTER COLUMN reason SET DATA TYPE file_ban_reason, + ALTER COLUMN sha256_digest SET DATA TYPE sha256, + -- DEFAULT NULL is implied on nullable columns. + ALTER COLUMN description DROP DEFAULT, + ALTER COLUMN malware_name DROP DEFAULT, + ADD CONSTRAINT file_ban_is_valid_malware + CHECK (reason != 'malware' OR malware_name IS NOT NULL);